Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 2 of 2.

author: orivej <orivej@yandex-team.ru> 2022-02-10 16:45:01 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:01 +0300
commit: 2d37894b1b037cf24231090eda8589bbb44fb6fc (patch)
tree: be835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/llvm12/lib/Transforms/Instrumentation
parent: 718c552901d703c502ccbefdfc3c9028d608b947 (diff)
download: ydb-2d37894b1b037cf24231090eda8589bbb44fb6fc.tar.gz
22 files changed, 21670 insertions, 21670 deletions
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 7212096f1b..f4e471706d 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1,140 +1,140 @@
-//===- AddressSanitizer.cpp - memory error detector -----------------------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file is a part of AddressSanitizer, an address sanity checker. 
-// Details of the algorithm: 
-//  https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm 
-// 
-// FIXME: This sanitizer does not yet handle scalable vectors 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/AddressSanitizer.h" 
-#include "llvm/ADT/ArrayRef.h" 
-#include "llvm/ADT/DenseMap.h" 
-#include "llvm/ADT/DepthFirstIterator.h" 
-#include "llvm/ADT/SmallPtrSet.h" 
-#include "llvm/ADT/SmallVector.h" 
-#include "llvm/ADT/Statistic.h" 
-#include "llvm/ADT/StringExtras.h" 
-#include "llvm/ADT/StringRef.h" 
-#include "llvm/ADT/Triple.h" 
-#include "llvm/ADT/Twine.h" 
-#include "llvm/Analysis/MemoryBuiltins.h" 
-#include "llvm/Analysis/TargetLibraryInfo.h" 
-#include "llvm/Analysis/ValueTracking.h" 
-#include "llvm/BinaryFormat/MachO.h" 
-#include "llvm/IR/Argument.h" 
-#include "llvm/IR/Attributes.h" 
-#include "llvm/IR/BasicBlock.h" 
-#include "llvm/IR/Comdat.h" 
-#include "llvm/IR/Constant.h" 
-#include "llvm/IR/Constants.h" 
-#include "llvm/IR/DIBuilder.h" 
-#include "llvm/IR/DataLayout.h" 
-#include "llvm/IR/DebugInfoMetadata.h" 
-#include "llvm/IR/DebugLoc.h" 
-#include "llvm/IR/DerivedTypes.h" 
-#include "llvm/IR/Dominators.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/GlobalAlias.h" 
-#include "llvm/IR/GlobalValue.h" 
-#include "llvm/IR/GlobalVariable.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/InlineAsm.h" 
-#include "llvm/IR/InstVisitor.h" 
-#include "llvm/IR/InstrTypes.h" 
-#include "llvm/IR/Instruction.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/IR/Intrinsics.h" 
-#include "llvm/IR/LLVMContext.h" 
-#include "llvm/IR/MDBuilder.h" 
-#include "llvm/IR/Metadata.h" 
-#include "llvm/IR/Module.h" 
-#include "llvm/IR/Type.h" 
-#include "llvm/IR/Use.h" 
-#include "llvm/IR/Value.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/MC/MCSectionMachO.h" 
-#include "llvm/Pass.h" 
-#include "llvm/Support/Casting.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/ErrorHandling.h" 
-#include "llvm/Support/MathExtras.h" 
-#include "llvm/Support/ScopedPrinter.h" 
-#include "llvm/Support/raw_ostream.h" 
-#include "llvm/Transforms/Instrumentation.h" 
-#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" 
-#include "llvm/Transforms/Utils/ASanStackFrameLayout.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include "llvm/Transforms/Utils/Local.h" 
-#include "llvm/Transforms/Utils/ModuleUtils.h" 
-#include "llvm/Transforms/Utils/PromoteMemToReg.h" 
-#include <algorithm> 
-#include <cassert> 
-#include <cstddef> 
-#include <cstdint> 
-#include <iomanip> 
-#include <limits> 
-#include <memory> 
-#include <sstream> 
-#include <string> 
-#include <tuple> 
- 
-using namespace llvm; 
- 
-#define DEBUG_TYPE "asan" 
- 
-static const uint64_t kDefaultShadowScale = 3; 
-static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; 
-static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; 
-static const uint64_t kDynamicShadowSentinel = 
-    std::numeric_limits<uint64_t>::max(); 
-static const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF;  // < 2G. 
-static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL; 
-static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000; 
-static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 44; 
-static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52; 
-static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; 
-static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; 
-static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; 
+//===- AddressSanitizer.cpp - memory error detector -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+// Details of the algorithm:
+//  https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm
+//
+// FIXME: This sanitizer does not yet handle scalable vectors
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
+#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iomanip>
+#include <limits>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asan"
+
+static const uint64_t kDefaultShadowScale = 3;
+static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
+static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
+static const uint64_t kDynamicShadowSentinel =
+    std::numeric_limits<uint64_t>::max();
+static const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF;  // < 2G.
+static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL;
+static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000;
+static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 44;
+static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52;
+static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
+static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37;
+static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;
 static const uint64_t kRISCV64_ShadowOffset64 = 0x20000000;
-static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; 
-static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; 
-static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30; 
-static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46; 
-static const uint64_t kNetBSDKasan_ShadowOffset64 = 0xdfff900000000000; 
-static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40; 
-static const uint64_t kWindowsShadowOffset32 = 3ULL << 28; 
-static const uint64_t kEmscriptenShadowOffset = 0; 
- 
-static const uint64_t kMyriadShadowScale = 5; 
-static const uint64_t kMyriadMemoryOffset32 = 0x80000000ULL; 
-static const uint64_t kMyriadMemorySize32 = 0x20000000ULL; 
-static const uint64_t kMyriadTagShift = 29; 
-static const uint64_t kMyriadDDRTag = 4; 
-static const uint64_t kMyriadCacheBitMask32 = 0x40000000ULL; 
- 
-// The shadow memory space is dynamically allocated. 
-static const uint64_t kWindowsShadowOffset64 = kDynamicShadowSentinel; 
- 
-static const size_t kMinStackMallocSize = 1 << 6;   // 64B 
-static const size_t kMaxStackMallocSize = 1 << 16;  // 64K 
-static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; 
-static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; 
- 
+static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
+static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
+static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30;
+static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46;
+static const uint64_t kNetBSDKasan_ShadowOffset64 = 0xdfff900000000000;
+static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40;
+static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
+static const uint64_t kEmscriptenShadowOffset = 0;
+
+static const uint64_t kMyriadShadowScale = 5;
+static const uint64_t kMyriadMemoryOffset32 = 0x80000000ULL;
+static const uint64_t kMyriadMemorySize32 = 0x20000000ULL;
+static const uint64_t kMyriadTagShift = 29;
+static const uint64_t kMyriadDDRTag = 4;
+static const uint64_t kMyriadCacheBitMask32 = 0x40000000ULL;
+
+// The shadow memory space is dynamically allocated.
+static const uint64_t kWindowsShadowOffset64 = kDynamicShadowSentinel;
+
+static const size_t kMinStackMallocSize = 1 << 6;   // 64B
+static const size_t kMaxStackMallocSize = 1 << 16;  // 64K
+static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
+static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
+
 const char kAsanModuleCtorName[] = "asan.module_ctor";
 const char kAsanModuleDtorName[] = "asan.module_dtor";
-static const uint64_t kAsanCtorAndDtorPriority = 1; 
-// On Emscripten, the system needs more than one priorities for constructors. 
-static const uint64_t kAsanEmscriptenCtorAndDtorPriority = 50; 
+static const uint64_t kAsanCtorAndDtorPriority = 1;
+// On Emscripten, the system needs more than one priorities for constructors.
+static const uint64_t kAsanEmscriptenCtorAndDtorPriority = 50;
 const char kAsanReportErrorTemplate[] = "__asan_report_";
 const char kAsanRegisterGlobalsName[] = "__asan_register_globals";
 const char kAsanUnregisterGlobalsName[] = "__asan_unregister_globals";
@@ -150,7 +150,7 @@ const char kAsanVersionCheckNamePrefix[] = "__asan_version_mismatch_check_v";
 const char kAsanPtrCmp[] = "__sanitizer_ptr_cmp";
 const char kAsanPtrSub[] = "__sanitizer_ptr_sub";
 const char kAsanHandleNoReturnName[] = "__asan_handle_no_return";
-static const int kMaxAsanStackMallocSizeClass = 10; 
+static const int kMaxAsanStackMallocSizeClass = 10;
 const char kAsanStackMallocNameTemplate[] = "__asan_stack_malloc_";
 const char kAsanStackFreeNameTemplate[] = "__asan_stack_free_";
 const char kAsanGenPrefix[] = "___asan_gen_";
@@ -159,808 +159,808 @@ const char kSanCovGenPrefix[] = "__sancov_gen_";
 const char kAsanSetShadowPrefix[] = "__asan_set_shadow_";
 const char kAsanPoisonStackMemoryName[] = "__asan_poison_stack_memory";
 const char kAsanUnpoisonStackMemoryName[] = "__asan_unpoison_stack_memory";
- 
-// ASan version script has __asan_* wildcard. Triple underscore prevents a 
-// linker (gold) warning about attempting to export a local symbol. 
+
+// ASan version script has __asan_* wildcard. Triple underscore prevents a
+// linker (gold) warning about attempting to export a local symbol.
 const char kAsanGlobalsRegisteredFlagName[] = "___asan_globals_registered";
- 
+
 const char kAsanOptionDetectUseAfterReturn[] =
-    "__asan_option_detect_stack_use_after_return"; 
- 
+    "__asan_option_detect_stack_use_after_return";
+
 const char kAsanShadowMemoryDynamicAddress[] =
-    "__asan_shadow_memory_dynamic_address"; 
- 
+    "__asan_shadow_memory_dynamic_address";
+
 const char kAsanAllocaPoison[] = "__asan_alloca_poison";
 const char kAsanAllocasUnpoison[] = "__asan_allocas_unpoison";
- 
-// Accesses sizes are powers of two: 1, 2, 4, 8, 16. 
-static const size_t kNumberOfAccessSizes = 5; 
- 
-static const unsigned kAllocaRzSize = 32; 
- 
-// Command-line flags. 
- 
-static cl::opt<bool> ClEnableKasan( 
-    "asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"), 
-    cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClRecover( 
-    "asan-recover", 
-    cl::desc("Enable recovery mode (continue-after-error)."), 
-    cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClInsertVersionCheck( 
-    "asan-guard-against-version-mismatch", 
-    cl::desc("Guard against compiler/runtime version mismatch."), 
-    cl::Hidden, cl::init(true)); 
- 
-// This flag may need to be replaced with -f[no-]asan-reads. 
-static cl::opt<bool> ClInstrumentReads("asan-instrument-reads", 
-                                       cl::desc("instrument read instructions"), 
-                                       cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClInstrumentWrites( 
-    "asan-instrument-writes", cl::desc("instrument write instructions"), 
-    cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClInstrumentAtomics( 
-    "asan-instrument-atomics", 
-    cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, 
-    cl::init(true)); 
- 
-static cl::opt<bool> 
-    ClInstrumentByval("asan-instrument-byval", 
-                      cl::desc("instrument byval call arguments"), cl::Hidden, 
-                      cl::init(true)); 
- 
-static cl::opt<bool> ClAlwaysSlowPath( 
-    "asan-always-slow-path", 
-    cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden, 
-    cl::init(false)); 
- 
-static cl::opt<bool> ClForceDynamicShadow( 
-    "asan-force-dynamic-shadow", 
-    cl::desc("Load shadow address into a local variable for each function"), 
-    cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> 
-    ClWithIfunc("asan-with-ifunc", 
-                cl::desc("Access dynamic shadow through an ifunc global on " 
-                         "platforms that support this"), 
-                cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClWithIfuncSuppressRemat( 
-    "asan-with-ifunc-suppress-remat", 
-    cl::desc("Suppress rematerialization of dynamic shadow address by passing " 
-             "it through inline asm in prologue."), 
-    cl::Hidden, cl::init(true)); 
- 
-// This flag limits the number of instructions to be instrumented 
-// in any given BB. Normally, this should be set to unlimited (INT_MAX), 
-// but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary 
-// set it to 10000. 
-static cl::opt<int> ClMaxInsnsToInstrumentPerBB( 
-    "asan-max-ins-per-bb", cl::init(10000), 
-    cl::desc("maximal number of instructions to instrument in any given BB"), 
-    cl::Hidden); 
- 
-// This flag may need to be replaced with -f[no]asan-stack. 
-static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"), 
-                             cl::Hidden, cl::init(true)); 
-static cl::opt<uint32_t> ClMaxInlinePoisoningSize( 
-    "asan-max-inline-poisoning-size", 
-    cl::desc( 
-        "Inline shadow poisoning for blocks up to the given size in bytes."), 
-    cl::Hidden, cl::init(64)); 
- 
-static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", 
-                                      cl::desc("Check stack-use-after-return"), 
-                                      cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClRedzoneByvalArgs("asan-redzone-byval-args", 
-                                        cl::desc("Create redzones for byval " 
-                                                 "arguments (extra copy " 
-                                                 "required)"), cl::Hidden, 
-                                        cl::init(true)); 
- 
-static cl::opt<bool> ClUseAfterScope("asan-use-after-scope", 
-                                     cl::desc("Check stack-use-after-scope"), 
-                                     cl::Hidden, cl::init(false)); 
- 
-// This flag may need to be replaced with -f[no]asan-globals. 
-static cl::opt<bool> ClGlobals("asan-globals", 
-                               cl::desc("Handle global objects"), cl::Hidden, 
-                               cl::init(true)); 
- 
-static cl::opt<bool> ClInitializers("asan-initialization-order", 
-                                    cl::desc("Handle C++ initializer order"), 
-                                    cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClInvalidPointerPairs( 
-    "asan-detect-invalid-pointer-pair", 
-    cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden, 
-    cl::init(false)); 
- 
-static cl::opt<bool> ClInvalidPointerCmp( 
-    "asan-detect-invalid-pointer-cmp", 
-    cl::desc("Instrument <, <=, >, >= with pointer operands"), cl::Hidden, 
-    cl::init(false)); 
- 
-static cl::opt<bool> ClInvalidPointerSub( 
-    "asan-detect-invalid-pointer-sub", 
-    cl::desc("Instrument - operations with pointer operands"), cl::Hidden, 
-    cl::init(false)); 
- 
-static cl::opt<unsigned> ClRealignStack( 
-    "asan-realign-stack", 
-    cl::desc("Realign stack to the value of this flag (power of two)"), 
-    cl::Hidden, cl::init(32)); 
- 
-static cl::opt<int> ClInstrumentationWithCallsThreshold( 
-    "asan-instrumentation-with-call-threshold", 
-    cl::desc( 
-        "If the function being instrumented contains more than " 
-        "this number of memory accesses, use callbacks instead of " 
-        "inline checks (-1 means never use callbacks)."), 
-    cl::Hidden, cl::init(7000)); 
- 
-static cl::opt<std::string> ClMemoryAccessCallbackPrefix( 
-    "asan-memory-access-callback-prefix", 
-    cl::desc("Prefix for memory access callbacks"), cl::Hidden, 
-    cl::init("__asan_")); 
- 
-static cl::opt<bool> 
-    ClInstrumentDynamicAllocas("asan-instrument-dynamic-allocas", 
-                               cl::desc("instrument dynamic allocas"), 
-                               cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClSkipPromotableAllocas( 
-    "asan-skip-promotable-allocas", 
-    cl::desc("Do not instrument promotable allocas"), cl::Hidden, 
-    cl::init(true)); 
- 
-// These flags allow to change the shadow mapping. 
-// The shadow mapping looks like 
-//    Shadow = (Mem >> scale) + offset 
- 
-static cl::opt<int> ClMappingScale("asan-mapping-scale", 
-                                   cl::desc("scale of asan shadow mapping"), 
-                                   cl::Hidden, cl::init(0)); 
- 
-static cl::opt<uint64_t> 
-    ClMappingOffset("asan-mapping-offset", 
-                    cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"), 
-                    cl::Hidden, cl::init(0)); 
- 
-// Optimization flags. Not user visible, used mostly for testing 
-// and benchmarking the tool. 
- 
-static cl::opt<bool> ClOpt("asan-opt", cl::desc("Optimize instrumentation"), 
-                           cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClOptSameTemp( 
-    "asan-opt-same-temp", cl::desc("Instrument the same temp just once"), 
-    cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClOptGlobals("asan-opt-globals", 
-                                  cl::desc("Don't instrument scalar globals"), 
-                                  cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClOptStack( 
-    "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"), 
-    cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClDynamicAllocaStack( 
-    "asan-stack-dynamic-alloca", 
-    cl::desc("Use dynamic alloca to represent stack variables"), cl::Hidden, 
-    cl::init(true)); 
- 
-static cl::opt<uint32_t> ClForceExperiment( 
-    "asan-force-experiment", 
-    cl::desc("Force optimization experiment (for testing)"), cl::Hidden, 
-    cl::init(0)); 
- 
-static cl::opt<bool> 
-    ClUsePrivateAlias("asan-use-private-alias", 
-                      cl::desc("Use private aliases for global variables"), 
-                      cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> 
-    ClUseOdrIndicator("asan-use-odr-indicator", 
-                      cl::desc("Use odr indicators to improve ODR reporting"), 
-                      cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> 
-    ClUseGlobalsGC("asan-globals-live-support", 
-                   cl::desc("Use linker features to support dead " 
-                            "code stripping of globals"), 
-                   cl::Hidden, cl::init(true)); 
- 
-// This is on by default even though there is a bug in gold: 
-// https://sourceware.org/bugzilla/show_bug.cgi?id=19002 
-static cl::opt<bool> 
-    ClWithComdat("asan-with-comdat", 
-                 cl::desc("Place ASan constructors in comdat sections"), 
-                 cl::Hidden, cl::init(true)); 
- 
-// Debug flags. 
- 
-static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, 
-                            cl::init(0)); 
- 
-static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"), 
-                                 cl::Hidden, cl::init(0)); 
- 
-static cl::opt<std::string> ClDebugFunc("asan-debug-func", cl::Hidden, 
-                                        cl::desc("Debug func")); 
- 
-static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"), 
-                               cl::Hidden, cl::init(-1)); 
- 
-static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug max inst"), 
-                               cl::Hidden, cl::init(-1)); 
- 
-STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); 
-STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); 
-STATISTIC(NumOptimizedAccessesToGlobalVar, 
-          "Number of optimized accesses to global vars"); 
-STATISTIC(NumOptimizedAccessesToStackVar, 
-          "Number of optimized accesses to stack vars"); 
- 
-namespace { 
- 
-/// This struct defines the shadow mapping using the rule: 
-///   shadow = (mem >> Scale) ADD-or-OR Offset. 
-/// If InGlobal is true, then 
-///   extern char __asan_shadow[]; 
-///   shadow = (mem >> Scale) + &__asan_shadow 
-struct ShadowMapping { 
-  int Scale; 
-  uint64_t Offset; 
-  bool OrShadowOffset; 
-  bool InGlobal; 
-}; 
- 
-} // end anonymous namespace 
- 
-static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, 
-                                      bool IsKasan) { 
-  bool IsAndroid = TargetTriple.isAndroid(); 
-  bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS(); 
+
+// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+static const size_t kNumberOfAccessSizes = 5;
+
+static const unsigned kAllocaRzSize = 32;
+
+// Command-line flags.
+
+static cl::opt<bool> ClEnableKasan(
+    "asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"),
+    cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClRecover(
+    "asan-recover",
+    cl::desc("Enable recovery mode (continue-after-error)."),
+    cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClInsertVersionCheck(
+    "asan-guard-against-version-mismatch",
+    cl::desc("Guard against compiler/runtime version mismatch."),
+    cl::Hidden, cl::init(true));
+
+// This flag may need to be replaced with -f[no-]asan-reads.
+static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
+                                       cl::desc("instrument read instructions"),
+                                       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInstrumentWrites(
+    "asan-instrument-writes", cl::desc("instrument write instructions"),
+    cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInstrumentAtomics(
+    "asan-instrument-atomics",
+    cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
+    cl::init(true));
+
+static cl::opt<bool>
+    ClInstrumentByval("asan-instrument-byval",
+                      cl::desc("instrument byval call arguments"), cl::Hidden,
+                      cl::init(true));
+
+static cl::opt<bool> ClAlwaysSlowPath(
+    "asan-always-slow-path",
+    cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden,
+    cl::init(false));
+
+static cl::opt<bool> ClForceDynamicShadow(
+    "asan-force-dynamic-shadow",
+    cl::desc("Load shadow address into a local variable for each function"),
+    cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+    ClWithIfunc("asan-with-ifunc",
+                cl::desc("Access dynamic shadow through an ifunc global on "
+                         "platforms that support this"),
+                cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClWithIfuncSuppressRemat(
+    "asan-with-ifunc-suppress-remat",
+    cl::desc("Suppress rematerialization of dynamic shadow address by passing "
+             "it through inline asm in prologue."),
+    cl::Hidden, cl::init(true));
+
+// This flag limits the number of instructions to be instrumented
+// in any given BB. Normally, this should be set to unlimited (INT_MAX),
+// but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary
+// set it to 10000.
+static cl::opt<int> ClMaxInsnsToInstrumentPerBB(
+    "asan-max-ins-per-bb", cl::init(10000),
+    cl::desc("maximal number of instructions to instrument in any given BB"),
+    cl::Hidden);
+
+// This flag may need to be replaced with -f[no]asan-stack.
+static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"),
+                             cl::Hidden, cl::init(true));
+static cl::opt<uint32_t> ClMaxInlinePoisoningSize(
+    "asan-max-inline-poisoning-size",
+    cl::desc(
+        "Inline shadow poisoning for blocks up to the given size in bytes."),
+    cl::Hidden, cl::init(64));
+
+static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
+                                      cl::desc("Check stack-use-after-return"),
+                                      cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClRedzoneByvalArgs("asan-redzone-byval-args",
+                                        cl::desc("Create redzones for byval "
+                                                 "arguments (extra copy "
+                                                 "required)"), cl::Hidden,
+                                        cl::init(true));
+
+static cl::opt<bool> ClUseAfterScope("asan-use-after-scope",
+                                     cl::desc("Check stack-use-after-scope"),
+                                     cl::Hidden, cl::init(false));
+
+// This flag may need to be replaced with -f[no]asan-globals.
+static cl::opt<bool> ClGlobals("asan-globals",
+                               cl::desc("Handle global objects"), cl::Hidden,
+                               cl::init(true));
+
+static cl::opt<bool> ClInitializers("asan-initialization-order",
+                                    cl::desc("Handle C++ initializer order"),
+                                    cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInvalidPointerPairs(
+    "asan-detect-invalid-pointer-pair",
+    cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden,
+    cl::init(false));
+
+static cl::opt<bool> ClInvalidPointerCmp(
+    "asan-detect-invalid-pointer-cmp",
+    cl::desc("Instrument <, <=, >, >= with pointer operands"), cl::Hidden,
+    cl::init(false));
+
+static cl::opt<bool> ClInvalidPointerSub(
+    "asan-detect-invalid-pointer-sub",
+    cl::desc("Instrument - operations with pointer operands"), cl::Hidden,
+    cl::init(false));
+
+static cl::opt<unsigned> ClRealignStack(
+    "asan-realign-stack",
+    cl::desc("Realign stack to the value of this flag (power of two)"),
+    cl::Hidden, cl::init(32));
+
+static cl::opt<int> ClInstrumentationWithCallsThreshold(
+    "asan-instrumentation-with-call-threshold",
+    cl::desc(
+        "If the function being instrumented contains more than "
+        "this number of memory accesses, use callbacks instead of "
+        "inline checks (-1 means never use callbacks)."),
+    cl::Hidden, cl::init(7000));
+
+static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
+    "asan-memory-access-callback-prefix",
+    cl::desc("Prefix for memory access callbacks"), cl::Hidden,
+    cl::init("__asan_"));
+
+static cl::opt<bool>
+    ClInstrumentDynamicAllocas("asan-instrument-dynamic-allocas",
+                               cl::desc("instrument dynamic allocas"),
+                               cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClSkipPromotableAllocas(
+    "asan-skip-promotable-allocas",
+    cl::desc("Do not instrument promotable allocas"), cl::Hidden,
+    cl::init(true));
+
+// These flags allow to change the shadow mapping.
+// The shadow mapping looks like
+//    Shadow = (Mem >> scale) + offset
+
+static cl::opt<int> ClMappingScale("asan-mapping-scale",
+                                   cl::desc("scale of asan shadow mapping"),
+                                   cl::Hidden, cl::init(0));
+
+static cl::opt<uint64_t>
+    ClMappingOffset("asan-mapping-offset",
+                    cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"),
+                    cl::Hidden, cl::init(0));
+
+// Optimization flags. Not user visible, used mostly for testing
+// and benchmarking the tool.
+
+static cl::opt<bool> ClOpt("asan-opt", cl::desc("Optimize instrumentation"),
+                           cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClOptSameTemp(
+    "asan-opt-same-temp", cl::desc("Instrument the same temp just once"),
+    cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClOptGlobals("asan-opt-globals",
+                                  cl::desc("Don't instrument scalar globals"),
+                                  cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClOptStack(
+    "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"),
+    cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClDynamicAllocaStack(
+    "asan-stack-dynamic-alloca",
+    cl::desc("Use dynamic alloca to represent stack variables"), cl::Hidden,
+    cl::init(true));
+
+static cl::opt<uint32_t> ClForceExperiment(
+    "asan-force-experiment",
+    cl::desc("Force optimization experiment (for testing)"), cl::Hidden,
+    cl::init(0));
+
+static cl::opt<bool>
+    ClUsePrivateAlias("asan-use-private-alias",
+                      cl::desc("Use private aliases for global variables"),
+                      cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+    ClUseOdrIndicator("asan-use-odr-indicator",
+                      cl::desc("Use odr indicators to improve ODR reporting"),
+                      cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+    ClUseGlobalsGC("asan-globals-live-support",
+                   cl::desc("Use linker features to support dead "
+                            "code stripping of globals"),
+                   cl::Hidden, cl::init(true));
+
+// This is on by default even though there is a bug in gold:
+// https://sourceware.org/bugzilla/show_bug.cgi?id=19002
+static cl::opt<bool>
+    ClWithComdat("asan-with-comdat",
+                 cl::desc("Place ASan constructors in comdat sections"),
+                 cl::Hidden, cl::init(true));
+
+// Debug flags.
+
+static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
+                            cl::init(0));
+
+static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"),
+                                 cl::Hidden, cl::init(0));
+
+static cl::opt<std::string> ClDebugFunc("asan-debug-func", cl::Hidden,
+                                        cl::desc("Debug func"));
+
+static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
+                               cl::Hidden, cl::init(-1));
+
+static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug max inst"),
+                               cl::Hidden, cl::init(-1));
+
+STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
+STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+STATISTIC(NumOptimizedAccessesToGlobalVar,
+          "Number of optimized accesses to global vars");
+STATISTIC(NumOptimizedAccessesToStackVar,
+          "Number of optimized accesses to stack vars");
+
+namespace {
+
+/// This struct defines the shadow mapping using the rule:
+///   shadow = (mem >> Scale) ADD-or-OR Offset.
+/// If InGlobal is true, then
+///   extern char __asan_shadow[];
+///   shadow = (mem >> Scale) + &__asan_shadow
+struct ShadowMapping {
+  int Scale;
+  uint64_t Offset;
+  bool OrShadowOffset;
+  bool InGlobal;
+};
+
+} // end anonymous namespace
+
+static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
+                                      bool IsKasan) {
+  bool IsAndroid = TargetTriple.isAndroid();
+  bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS();
   bool IsMacOS = TargetTriple.isMacOSX();
-  bool IsFreeBSD = TargetTriple.isOSFreeBSD(); 
-  bool IsNetBSD = TargetTriple.isOSNetBSD(); 
-  bool IsPS4CPU = TargetTriple.isPS4CPU(); 
-  bool IsLinux = TargetTriple.isOSLinux(); 
-  bool IsPPC64 = TargetTriple.getArch() == Triple::ppc64 || 
-                 TargetTriple.getArch() == Triple::ppc64le; 
-  bool IsSystemZ = TargetTriple.getArch() == Triple::systemz; 
-  bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64; 
-  bool IsMIPS32 = TargetTriple.isMIPS32(); 
-  bool IsMIPS64 = TargetTriple.isMIPS64(); 
-  bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); 
-  bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; 
+  bool IsFreeBSD = TargetTriple.isOSFreeBSD();
+  bool IsNetBSD = TargetTriple.isOSNetBSD();
+  bool IsPS4CPU = TargetTriple.isPS4CPU();
+  bool IsLinux = TargetTriple.isOSLinux();
+  bool IsPPC64 = TargetTriple.getArch() == Triple::ppc64 ||
+                 TargetTriple.getArch() == Triple::ppc64le;
+  bool IsSystemZ = TargetTriple.getArch() == Triple::systemz;
+  bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
+  bool IsMIPS32 = TargetTriple.isMIPS32();
+  bool IsMIPS64 = TargetTriple.isMIPS64();
+  bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb();
+  bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64;
   bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64;
-  bool IsWindows = TargetTriple.isOSWindows(); 
-  bool IsFuchsia = TargetTriple.isOSFuchsia(); 
-  bool IsMyriad = TargetTriple.getVendor() == llvm::Triple::Myriad; 
-  bool IsEmscripten = TargetTriple.isOSEmscripten(); 
- 
-  ShadowMapping Mapping; 
- 
-  Mapping.Scale = IsMyriad ? kMyriadShadowScale : kDefaultShadowScale; 
-  if (ClMappingScale.getNumOccurrences() > 0) { 
-    Mapping.Scale = ClMappingScale; 
-  } 
- 
-  if (LongSize == 32) { 
-    if (IsAndroid) 
-      Mapping.Offset = kDynamicShadowSentinel; 
-    else if (IsMIPS32) 
-      Mapping.Offset = kMIPS32_ShadowOffset32; 
-    else if (IsFreeBSD) 
-      Mapping.Offset = kFreeBSD_ShadowOffset32; 
-    else if (IsNetBSD) 
-      Mapping.Offset = kNetBSD_ShadowOffset32; 
-    else if (IsIOS) 
-      Mapping.Offset = kDynamicShadowSentinel; 
-    else if (IsWindows) 
-      Mapping.Offset = kWindowsShadowOffset32; 
-    else if (IsEmscripten) 
-      Mapping.Offset = kEmscriptenShadowOffset; 
-    else if (IsMyriad) { 
-      uint64_t ShadowOffset = (kMyriadMemoryOffset32 + kMyriadMemorySize32 - 
-                               (kMyriadMemorySize32 >> Mapping.Scale)); 
-      Mapping.Offset = ShadowOffset - (kMyriadMemoryOffset32 >> Mapping.Scale); 
-    } 
-    else 
-      Mapping.Offset = kDefaultShadowOffset32; 
-  } else {  // LongSize == 64 
-    // Fuchsia is always PIE, which means that the beginning of the address 
-    // space is always available. 
-    if (IsFuchsia) 
-      Mapping.Offset = 0; 
-    else if (IsPPC64) 
-      Mapping.Offset = kPPC64_ShadowOffset64; 
-    else if (IsSystemZ) 
-      Mapping.Offset = kSystemZ_ShadowOffset64; 
-    else if (IsFreeBSD && !IsMIPS64) 
-      Mapping.Offset = kFreeBSD_ShadowOffset64; 
-    else if (IsNetBSD) { 
-      if (IsKasan) 
-        Mapping.Offset = kNetBSDKasan_ShadowOffset64; 
-      else 
-        Mapping.Offset = kNetBSD_ShadowOffset64; 
-    } else if (IsPS4CPU) 
-      Mapping.Offset = kPS4CPU_ShadowOffset64; 
-    else if (IsLinux && IsX86_64) { 
-      if (IsKasan) 
-        Mapping.Offset = kLinuxKasan_ShadowOffset64; 
-      else 
-        Mapping.Offset = (kSmallX86_64ShadowOffsetBase & 
-                          (kSmallX86_64ShadowOffsetAlignMask << Mapping.Scale)); 
-    } else if (IsWindows && IsX86_64) { 
-      Mapping.Offset = kWindowsShadowOffset64; 
-    } else if (IsMIPS64) 
-      Mapping.Offset = kMIPS64_ShadowOffset64; 
-    else if (IsIOS) 
-      Mapping.Offset = kDynamicShadowSentinel; 
+  bool IsWindows = TargetTriple.isOSWindows();
+  bool IsFuchsia = TargetTriple.isOSFuchsia();
+  bool IsMyriad = TargetTriple.getVendor() == llvm::Triple::Myriad;
+  bool IsEmscripten = TargetTriple.isOSEmscripten();
+
+  ShadowMapping Mapping;
+
+  Mapping.Scale = IsMyriad ? kMyriadShadowScale : kDefaultShadowScale;
+  if (ClMappingScale.getNumOccurrences() > 0) {
+    Mapping.Scale = ClMappingScale;
+  }
+
+  if (LongSize == 32) {
+    if (IsAndroid)
+      Mapping.Offset = kDynamicShadowSentinel;
+    else if (IsMIPS32)
+      Mapping.Offset = kMIPS32_ShadowOffset32;
+    else if (IsFreeBSD)
+      Mapping.Offset = kFreeBSD_ShadowOffset32;
+    else if (IsNetBSD)
+      Mapping.Offset = kNetBSD_ShadowOffset32;
+    else if (IsIOS)
+      Mapping.Offset = kDynamicShadowSentinel;
+    else if (IsWindows)
+      Mapping.Offset = kWindowsShadowOffset32;
+    else if (IsEmscripten)
+      Mapping.Offset = kEmscriptenShadowOffset;
+    else if (IsMyriad) {
+      uint64_t ShadowOffset = (kMyriadMemoryOffset32 + kMyriadMemorySize32 -
+                               (kMyriadMemorySize32 >> Mapping.Scale));
+      Mapping.Offset = ShadowOffset - (kMyriadMemoryOffset32 >> Mapping.Scale);
+    }
+    else
+      Mapping.Offset = kDefaultShadowOffset32;
+  } else {  // LongSize == 64
+    // Fuchsia is always PIE, which means that the beginning of the address
+    // space is always available.
+    if (IsFuchsia)
+      Mapping.Offset = 0;
+    else if (IsPPC64)
+      Mapping.Offset = kPPC64_ShadowOffset64;
+    else if (IsSystemZ)
+      Mapping.Offset = kSystemZ_ShadowOffset64;
+    else if (IsFreeBSD && !IsMIPS64)
+      Mapping.Offset = kFreeBSD_ShadowOffset64;
+    else if (IsNetBSD) {
+      if (IsKasan)
+        Mapping.Offset = kNetBSDKasan_ShadowOffset64;
+      else
+        Mapping.Offset = kNetBSD_ShadowOffset64;
+    } else if (IsPS4CPU)
+      Mapping.Offset = kPS4CPU_ShadowOffset64;
+    else if (IsLinux && IsX86_64) {
+      if (IsKasan)
+        Mapping.Offset = kLinuxKasan_ShadowOffset64;
+      else
+        Mapping.Offset = (kSmallX86_64ShadowOffsetBase &
+                          (kSmallX86_64ShadowOffsetAlignMask << Mapping.Scale));
+    } else if (IsWindows && IsX86_64) {
+      Mapping.Offset = kWindowsShadowOffset64;
+    } else if (IsMIPS64)
+      Mapping.Offset = kMIPS64_ShadowOffset64;
+    else if (IsIOS)
+      Mapping.Offset = kDynamicShadowSentinel;
     else if (IsMacOS && IsAArch64)
       Mapping.Offset = kDynamicShadowSentinel;
-    else if (IsAArch64) 
-      Mapping.Offset = kAArch64_ShadowOffset64; 
+    else if (IsAArch64)
+      Mapping.Offset = kAArch64_ShadowOffset64;
     else if (IsRISCV64)
       Mapping.Offset = kRISCV64_ShadowOffset64;
-    else 
-      Mapping.Offset = kDefaultShadowOffset64; 
-  } 
- 
-  if (ClForceDynamicShadow) { 
-    Mapping.Offset = kDynamicShadowSentinel; 
-  } 
- 
-  if (ClMappingOffset.getNumOccurrences() > 0) { 
-    Mapping.Offset = ClMappingOffset; 
-  } 
- 
-  // OR-ing shadow offset if more efficient (at least on x86) if the offset 
-  // is a power of two, but on ppc64 we have to use add since the shadow 
-  // offset is not necessary 1/8-th of the address space.  On SystemZ, 
-  // we could OR the constant in a single instruction, but it's more 
-  // efficient to load it once and use indexed addressing. 
-  Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS4CPU && 
+    else
+      Mapping.Offset = kDefaultShadowOffset64;
+  }
+
+  if (ClForceDynamicShadow) {
+    Mapping.Offset = kDynamicShadowSentinel;
+  }
+
+  if (ClMappingOffset.getNumOccurrences() > 0) {
+    Mapping.Offset = ClMappingOffset;
+  }
+
+  // OR-ing shadow offset if more efficient (at least on x86) if the offset
+  // is a power of two, but on ppc64 we have to use add since the shadow
+  // offset is not necessary 1/8-th of the address space.  On SystemZ,
+  // we could OR the constant in a single instruction, but it's more
+  // efficient to load it once and use indexed addressing.
+  Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS4CPU &&
                            !IsRISCV64 &&
-                           !(Mapping.Offset & (Mapping.Offset - 1)) && 
-                           Mapping.Offset != kDynamicShadowSentinel; 
-  bool IsAndroidWithIfuncSupport = 
-      IsAndroid && !TargetTriple.isAndroidVersionLT(21); 
-  Mapping.InGlobal = ClWithIfunc && IsAndroidWithIfuncSupport && IsArmOrThumb; 
- 
-  return Mapping; 
-} 
- 
-static uint64_t getRedzoneSizeForScale(int MappingScale) { 
-  // Redzone used for stack and globals is at least 32 bytes. 
-  // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. 
-  return std::max(32U, 1U << MappingScale); 
-} 
- 
-static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) { 
-  if (TargetTriple.isOSEmscripten()) { 
-    return kAsanEmscriptenCtorAndDtorPriority; 
-  } else { 
-    return kAsanCtorAndDtorPriority; 
-  } 
-} 
- 
-namespace { 
- 
-/// Module analysis for getting various metadata about the module. 
-class ASanGlobalsMetadataWrapperPass : public ModulePass { 
-public: 
-  static char ID; 
- 
-  ASanGlobalsMetadataWrapperPass() : ModulePass(ID) { 
-    initializeASanGlobalsMetadataWrapperPassPass( 
-        *PassRegistry::getPassRegistry()); 
-  } 
- 
-  bool runOnModule(Module &M) override { 
-    GlobalsMD = GlobalsMetadata(M); 
-    return false; 
-  } 
- 
-  StringRef getPassName() const override { 
-    return "ASanGlobalsMetadataWrapperPass"; 
-  } 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.setPreservesAll(); 
-  } 
- 
-  GlobalsMetadata &getGlobalsMD() { return GlobalsMD; } 
- 
-private: 
-  GlobalsMetadata GlobalsMD; 
-}; 
- 
-char ASanGlobalsMetadataWrapperPass::ID = 0; 
- 
-/// AddressSanitizer: instrument the code in module to find memory bugs. 
-struct AddressSanitizer { 
-  AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, 
-                   bool CompileKernel = false, bool Recover = false, 
-                   bool UseAfterScope = false) 
-      : CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan 
-                                                            : CompileKernel), 
-        Recover(ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover), 
-        UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(*GlobalsMD) { 
-    C = &(M.getContext()); 
-    LongSize = M.getDataLayout().getPointerSizeInBits(); 
-    IntptrTy = Type::getIntNTy(*C, LongSize); 
-    TargetTriple = Triple(M.getTargetTriple()); 
- 
-    Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel); 
-  } 
- 
-  uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const { 
-    uint64_t ArraySize = 1; 
-    if (AI.isArrayAllocation()) { 
-      const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize()); 
-      assert(CI && "non-constant array size"); 
-      ArraySize = CI->getZExtValue(); 
-    } 
-    Type *Ty = AI.getAllocatedType(); 
-    uint64_t SizeInBytes = 
-        AI.getModule()->getDataLayout().getTypeAllocSize(Ty); 
-    return SizeInBytes * ArraySize; 
-  } 
- 
-  /// Check if we want (and can) handle this alloca. 
-  bool isInterestingAlloca(const AllocaInst &AI); 
- 
-  bool ignoreAccess(Value *Ptr); 
-  void getInterestingMemoryOperands( 
-      Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting); 
- 
-  void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, 
-                     InterestingMemoryOperand &O, bool UseCalls, 
-                     const DataLayout &DL); 
-  void instrumentPointerComparisonOrSubtraction(Instruction *I); 
-  void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, 
-                         Value *Addr, uint32_t TypeSize, bool IsWrite, 
-                         Value *SizeArgument, bool UseCalls, uint32_t Exp); 
-  void instrumentUnusualSizeOrAlignment(Instruction *I, 
-                                        Instruction *InsertBefore, Value *Addr, 
-                                        uint32_t TypeSize, bool IsWrite, 
-                                        Value *SizeArgument, bool UseCalls, 
-                                        uint32_t Exp); 
-  Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, 
-                           Value *ShadowValue, uint32_t TypeSize); 
-  Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, 
-                                 bool IsWrite, size_t AccessSizeIndex, 
-                                 Value *SizeArgument, uint32_t Exp); 
-  void instrumentMemIntrinsic(MemIntrinsic *MI); 
-  Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); 
-  bool suppressInstrumentationSiteForDebug(int &Instrumented); 
-  bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI); 
-  bool maybeInsertAsanInitAtFunctionEntry(Function &F); 
-  bool maybeInsertDynamicShadowAtFunctionEntry(Function &F); 
-  void markEscapedLocalAllocas(Function &F); 
- 
-private: 
-  friend struct FunctionStackPoisoner; 
- 
-  void initializeCallbacks(Module &M); 
- 
-  bool LooksLikeCodeInBug11395(Instruction *I); 
-  bool GlobalIsLinkerInitialized(GlobalVariable *G); 
-  bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr, 
-                    uint64_t TypeSize) const; 
- 
-  /// Helper to cleanup per-function state. 
-  struct FunctionStateRAII { 
-    AddressSanitizer *Pass; 
- 
-    FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) { 
-      assert(Pass->ProcessedAllocas.empty() && 
-             "last pass forgot to clear cache"); 
-      assert(!Pass->LocalDynamicShadow); 
-    } 
- 
-    ~FunctionStateRAII() { 
-      Pass->LocalDynamicShadow = nullptr; 
-      Pass->ProcessedAllocas.clear(); 
-    } 
-  }; 
- 
-  LLVMContext *C; 
-  Triple TargetTriple; 
-  int LongSize; 
-  bool CompileKernel; 
-  bool Recover; 
-  bool UseAfterScope; 
-  Type *IntptrTy; 
-  ShadowMapping Mapping; 
-  FunctionCallee AsanHandleNoReturnFunc; 
-  FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction; 
-  Constant *AsanShadowGlobal; 
- 
-  // These arrays is indexed by AccessIsWrite, Experiment and log2(AccessSize). 
-  FunctionCallee AsanErrorCallback[2][2][kNumberOfAccessSizes]; 
-  FunctionCallee AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes]; 
- 
-  // These arrays is indexed by AccessIsWrite and Experiment. 
-  FunctionCallee AsanErrorCallbackSized[2][2]; 
-  FunctionCallee AsanMemoryAccessCallbackSized[2][2]; 
- 
-  FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset; 
-  Value *LocalDynamicShadow = nullptr; 
-  const GlobalsMetadata &GlobalsMD; 
-  DenseMap<const AllocaInst *, bool> ProcessedAllocas; 
-}; 
- 
-class AddressSanitizerLegacyPass : public FunctionPass { 
-public: 
-  static char ID; 
- 
-  explicit AddressSanitizerLegacyPass(bool CompileKernel = false, 
-                                      bool Recover = false, 
-                                      bool UseAfterScope = false) 
-      : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover), 
-        UseAfterScope(UseAfterScope) { 
-    initializeAddressSanitizerLegacyPassPass(*PassRegistry::getPassRegistry()); 
-  } 
- 
-  StringRef getPassName() const override { 
-    return "AddressSanitizerFunctionPass"; 
-  } 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.addRequired<ASanGlobalsMetadataWrapperPass>(); 
-    AU.addRequired<TargetLibraryInfoWrapperPass>(); 
-  } 
- 
-  bool runOnFunction(Function &F) override { 
-    GlobalsMetadata &GlobalsMD = 
-        getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD(); 
-    const TargetLibraryInfo *TLI = 
-        &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 
-    AddressSanitizer ASan(*F.getParent(), &GlobalsMD, CompileKernel, Recover, 
-                          UseAfterScope); 
-    return ASan.instrumentFunction(F, TLI); 
-  } 
- 
-private: 
-  bool CompileKernel; 
-  bool Recover; 
-  bool UseAfterScope; 
-}; 
- 
-class ModuleAddressSanitizer { 
-public: 
-  ModuleAddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, 
-                         bool CompileKernel = false, bool Recover = false, 
-                         bool UseGlobalsGC = true, bool UseOdrIndicator = false) 
-      : GlobalsMD(*GlobalsMD), 
-        CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan 
-                                                            : CompileKernel), 
-        Recover(ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover), 
-        UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC && !this->CompileKernel), 
-        // Enable aliases as they should have no downside with ODR indicators. 
-        UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias), 
-        UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator), 
-        // Not a typo: ClWithComdat is almost completely pointless without 
-        // ClUseGlobalsGC (because then it only works on modules without 
-        // globals, which are rare); it is a prerequisite for ClUseGlobalsGC; 
-        // and both suffer from gold PR19002 for which UseGlobalsGC constructor 
-        // argument is designed as workaround. Therefore, disable both 
-        // ClWithComdat and ClUseGlobalsGC unless the frontend says it's ok to 
-        // do globals-gc. 
-        UseCtorComdat(UseGlobalsGC && ClWithComdat && !this->CompileKernel) { 
-    C = &(M.getContext()); 
-    int LongSize = M.getDataLayout().getPointerSizeInBits(); 
-    IntptrTy = Type::getIntNTy(*C, LongSize); 
-    TargetTriple = Triple(M.getTargetTriple()); 
-    Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel); 
-  } 
- 
-  bool instrumentModule(Module &); 
- 
-private: 
-  void initializeCallbacks(Module &M); 
- 
-  bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat); 
-  void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M, 
-                             ArrayRef<GlobalVariable *> ExtendedGlobals, 
-                             ArrayRef<Constant *> MetadataInitializers); 
-  void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M, 
-                            ArrayRef<GlobalVariable *> ExtendedGlobals, 
-                            ArrayRef<Constant *> MetadataInitializers, 
-                            const std::string &UniqueModuleId); 
-  void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M, 
-                              ArrayRef<GlobalVariable *> ExtendedGlobals, 
-                              ArrayRef<Constant *> MetadataInitializers); 
-  void 
-  InstrumentGlobalsWithMetadataArray(IRBuilder<> &IRB, Module &M, 
-                                     ArrayRef<GlobalVariable *> ExtendedGlobals, 
-                                     ArrayRef<Constant *> MetadataInitializers); 
- 
-  GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer, 
-                                       StringRef OriginalName); 
-  void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata, 
-                                  StringRef InternalSuffix); 
-  Instruction *CreateAsanModuleDtor(Module &M); 
- 
-  const GlobalVariable *getExcludedAliasedGlobal(const GlobalAlias &GA) const; 
-  bool shouldInstrumentGlobal(GlobalVariable *G) const; 
-  bool ShouldUseMachOGlobalsSection() const; 
-  StringRef getGlobalMetadataSection() const; 
-  void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName); 
-  void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName); 
-  uint64_t getMinRedzoneSizeForGlobal() const { 
-    return getRedzoneSizeForScale(Mapping.Scale); 
-  } 
-  uint64_t getRedzoneSizeForGlobal(uint64_t SizeInBytes) const; 
-  int GetAsanVersion(const Module &M) const; 
- 
-  const GlobalsMetadata &GlobalsMD; 
-  bool CompileKernel; 
-  bool Recover; 
-  bool UseGlobalsGC; 
-  bool UsePrivateAlias; 
-  bool UseOdrIndicator; 
-  bool UseCtorComdat; 
-  Type *IntptrTy; 
-  LLVMContext *C; 
-  Triple TargetTriple; 
-  ShadowMapping Mapping; 
-  FunctionCallee AsanPoisonGlobals; 
-  FunctionCallee AsanUnpoisonGlobals; 
-  FunctionCallee AsanRegisterGlobals; 
-  FunctionCallee AsanUnregisterGlobals; 
-  FunctionCallee AsanRegisterImageGlobals; 
-  FunctionCallee AsanUnregisterImageGlobals; 
-  FunctionCallee AsanRegisterElfGlobals; 
-  FunctionCallee AsanUnregisterElfGlobals; 
- 
-  Function *AsanCtorFunction = nullptr; 
-  Function *AsanDtorFunction = nullptr; 
-}; 
- 
-class ModuleAddressSanitizerLegacyPass : public ModulePass { 
-public: 
-  static char ID; 
- 
-  explicit ModuleAddressSanitizerLegacyPass(bool CompileKernel = false, 
-                                            bool Recover = false, 
-                                            bool UseGlobalGC = true, 
-                                            bool UseOdrIndicator = false) 
-      : ModulePass(ID), CompileKernel(CompileKernel), Recover(Recover), 
-        UseGlobalGC(UseGlobalGC), UseOdrIndicator(UseOdrIndicator) { 
-    initializeModuleAddressSanitizerLegacyPassPass( 
-        *PassRegistry::getPassRegistry()); 
-  } 
- 
-  StringRef getPassName() const override { return "ModuleAddressSanitizer"; } 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.addRequired<ASanGlobalsMetadataWrapperPass>(); 
-  } 
- 
-  bool runOnModule(Module &M) override { 
-    GlobalsMetadata &GlobalsMD = 
-        getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD(); 
-    ModuleAddressSanitizer ASanModule(M, &GlobalsMD, CompileKernel, Recover, 
-                                      UseGlobalGC, UseOdrIndicator); 
-    return ASanModule.instrumentModule(M); 
-  } 
- 
-private: 
-  bool CompileKernel; 
-  bool Recover; 
-  bool UseGlobalGC; 
-  bool UseOdrIndicator; 
-}; 
- 
-// Stack poisoning does not play well with exception handling. 
-// When an exception is thrown, we essentially bypass the code 
-// that unpoisones the stack. This is why the run-time library has 
-// to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire 
-// stack in the interceptor. This however does not work inside the 
-// actual function which catches the exception. Most likely because the 
-// compiler hoists the load of the shadow value somewhere too high. 
-// This causes asan to report a non-existing bug on 453.povray. 
-// It sounds like an LLVM bug. 
-struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { 
-  Function &F; 
-  AddressSanitizer &ASan; 
-  DIBuilder DIB; 
-  LLVMContext *C; 
-  Type *IntptrTy; 
-  Type *IntptrPtrTy; 
-  ShadowMapping Mapping; 
- 
-  SmallVector<AllocaInst *, 16> AllocaVec; 
-  SmallVector<AllocaInst *, 16> StaticAllocasToMoveUp; 
-  SmallVector<Instruction *, 8> RetVec; 
-  unsigned StackAlignment; 
- 
-  FunctionCallee AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1], 
-      AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1]; 
-  FunctionCallee AsanSetShadowFunc[0x100] = {}; 
-  FunctionCallee AsanPoisonStackMemoryFunc, AsanUnpoisonStackMemoryFunc; 
-  FunctionCallee AsanAllocaPoisonFunc, AsanAllocasUnpoisonFunc; 
- 
-  // Stores a place and arguments of poisoning/unpoisoning call for alloca. 
-  struct AllocaPoisonCall { 
-    IntrinsicInst *InsBefore; 
-    AllocaInst *AI; 
-    uint64_t Size; 
-    bool DoPoison; 
-  }; 
-  SmallVector<AllocaPoisonCall, 8> DynamicAllocaPoisonCallVec; 
-  SmallVector<AllocaPoisonCall, 8> StaticAllocaPoisonCallVec; 
-  bool HasUntracedLifetimeIntrinsic = false; 
- 
-  SmallVector<AllocaInst *, 1> DynamicAllocaVec; 
-  SmallVector<IntrinsicInst *, 1> StackRestoreVec; 
-  AllocaInst *DynamicAllocaLayout = nullptr; 
-  IntrinsicInst *LocalEscapeCall = nullptr; 
- 
-  bool HasInlineAsm = false; 
-  bool HasReturnsTwiceCall = false; 
- 
-  FunctionStackPoisoner(Function &F, AddressSanitizer &ASan) 
-      : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false), 
-        C(ASan.C), IntptrTy(ASan.IntptrTy), 
-        IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), 
-        StackAlignment(1 << Mapping.Scale) {} 
- 
-  bool runOnFunction() { 
-    if (!ClStack) return false; 
- 
-    if (ClRedzoneByvalArgs) 
-      copyArgsPassedByValToAllocas(); 
- 
-    // Collect alloca, ret, lifetime instructions etc. 
-    for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); 
- 
-    if (AllocaVec.empty() && DynamicAllocaVec.empty()) return false; 
- 
-    initializeCallbacks(*F.getParent()); 
- 
-    if (HasUntracedLifetimeIntrinsic) { 
-      // If there are lifetime intrinsics which couldn't be traced back to an 
-      // alloca, we may not know exactly when a variable enters scope, and 
-      // therefore should "fail safe" by not poisoning them. 
-      StaticAllocaPoisonCallVec.clear(); 
-      DynamicAllocaPoisonCallVec.clear(); 
-    } 
- 
-    processDynamicAllocas(); 
-    processStaticAllocas(); 
- 
-    if (ClDebugStack) { 
-      LLVM_DEBUG(dbgs() << F); 
-    } 
-    return true; 
-  } 
- 
-  // Arguments marked with the "byval" attribute are implicitly copied without 
-  // using an alloca instruction.  To produce redzones for those arguments, we 
-  // copy them a second time into memory allocated with an alloca instruction. 
-  void copyArgsPassedByValToAllocas(); 
- 
-  // Finds all Alloca instructions and puts 
-  // poisoned red zones around all of them. 
-  // Then unpoison everything back before the function returns. 
-  void processStaticAllocas(); 
-  void processDynamicAllocas(); 
- 
-  void createDynamicAllocasInitStorage(); 
- 
-  // ----------------------- Visitors. 
+                           !(Mapping.Offset & (Mapping.Offset - 1)) &&
+                           Mapping.Offset != kDynamicShadowSentinel;
+  bool IsAndroidWithIfuncSupport =
+      IsAndroid && !TargetTriple.isAndroidVersionLT(21);
+  Mapping.InGlobal = ClWithIfunc && IsAndroidWithIfuncSupport && IsArmOrThumb;
+
+  return Mapping;
+}
+
+static uint64_t getRedzoneSizeForScale(int MappingScale) {
+  // Redzone used for stack and globals is at least 32 bytes.
+  // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+  return std::max(32U, 1U << MappingScale);
+}
+
+static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) {
+  if (TargetTriple.isOSEmscripten()) {
+    return kAsanEmscriptenCtorAndDtorPriority;
+  } else {
+    return kAsanCtorAndDtorPriority;
+  }
+}
+
+namespace {
+
+/// Module analysis for getting various metadata about the module.
+class ASanGlobalsMetadataWrapperPass : public ModulePass {
+public:
+  static char ID;
+
+  ASanGlobalsMetadataWrapperPass() : ModulePass(ID) {
+    initializeASanGlobalsMetadataWrapperPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override {
+    GlobalsMD = GlobalsMetadata(M);
+    return false;
+  }
+
+  StringRef getPassName() const override {
+    return "ASanGlobalsMetadataWrapperPass";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+  GlobalsMetadata &getGlobalsMD() { return GlobalsMD; }
+
+private:
+  GlobalsMetadata GlobalsMD;
+};
+
+char ASanGlobalsMetadataWrapperPass::ID = 0;
+
+/// AddressSanitizer: instrument the code in module to find memory bugs.
+struct AddressSanitizer {
+  AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD,
+                   bool CompileKernel = false, bool Recover = false,
+                   bool UseAfterScope = false)
+      : CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan
+                                                            : CompileKernel),
+        Recover(ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover),
+        UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(*GlobalsMD) {
+    C = &(M.getContext());
+    LongSize = M.getDataLayout().getPointerSizeInBits();
+    IntptrTy = Type::getIntNTy(*C, LongSize);
+    TargetTriple = Triple(M.getTargetTriple());
+
+    Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
+  }
+
+  uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const {
+    uint64_t ArraySize = 1;
+    if (AI.isArrayAllocation()) {
+      const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize());
+      assert(CI && "non-constant array size");
+      ArraySize = CI->getZExtValue();
+    }
+    Type *Ty = AI.getAllocatedType();
+    uint64_t SizeInBytes =
+        AI.getModule()->getDataLayout().getTypeAllocSize(Ty);
+    return SizeInBytes * ArraySize;
+  }
+
+  /// Check if we want (and can) handle this alloca.
+  bool isInterestingAlloca(const AllocaInst &AI);
+
+  bool ignoreAccess(Value *Ptr);
+  void getInterestingMemoryOperands(
+      Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
+
+  void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
+                     InterestingMemoryOperand &O, bool UseCalls,
+                     const DataLayout &DL);
+  void instrumentPointerComparisonOrSubtraction(Instruction *I);
+  void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
+                         Value *Addr, uint32_t TypeSize, bool IsWrite,
+                         Value *SizeArgument, bool UseCalls, uint32_t Exp);
+  void instrumentUnusualSizeOrAlignment(Instruction *I,
+                                        Instruction *InsertBefore, Value *Addr,
+                                        uint32_t TypeSize, bool IsWrite,
+                                        Value *SizeArgument, bool UseCalls,
+                                        uint32_t Exp);
+  Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
+                           Value *ShadowValue, uint32_t TypeSize);
+  Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
+                                 bool IsWrite, size_t AccessSizeIndex,
+                                 Value *SizeArgument, uint32_t Exp);
+  void instrumentMemIntrinsic(MemIntrinsic *MI);
+  Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+  bool suppressInstrumentationSiteForDebug(int &Instrumented);
+  bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI);
+  bool maybeInsertAsanInitAtFunctionEntry(Function &F);
+  bool maybeInsertDynamicShadowAtFunctionEntry(Function &F);
+  void markEscapedLocalAllocas(Function &F);
+
+private:
+  friend struct FunctionStackPoisoner;
+
+  void initializeCallbacks(Module &M);
+
+  bool LooksLikeCodeInBug11395(Instruction *I);
+  bool GlobalIsLinkerInitialized(GlobalVariable *G);
+  bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr,
+                    uint64_t TypeSize) const;
+
+  /// Helper to cleanup per-function state.
+  struct FunctionStateRAII {
+    AddressSanitizer *Pass;
+
+    FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) {
+      assert(Pass->ProcessedAllocas.empty() &&
+             "last pass forgot to clear cache");
+      assert(!Pass->LocalDynamicShadow);
+    }
+
+    ~FunctionStateRAII() {
+      Pass->LocalDynamicShadow = nullptr;
+      Pass->ProcessedAllocas.clear();
+    }
+  };
+
+  LLVMContext *C;
+  Triple TargetTriple;
+  int LongSize;
+  bool CompileKernel;
+  bool Recover;
+  bool UseAfterScope;
+  Type *IntptrTy;
+  ShadowMapping Mapping;
+  FunctionCallee AsanHandleNoReturnFunc;
+  FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction;
+  Constant *AsanShadowGlobal;
+
+  // These arrays is indexed by AccessIsWrite, Experiment and log2(AccessSize).
+  FunctionCallee AsanErrorCallback[2][2][kNumberOfAccessSizes];
+  FunctionCallee AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes];
+
+  // These arrays is indexed by AccessIsWrite and Experiment.
+  FunctionCallee AsanErrorCallbackSized[2][2];
+  FunctionCallee AsanMemoryAccessCallbackSized[2][2];
+
+  FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset;
+  Value *LocalDynamicShadow = nullptr;
+  const GlobalsMetadata &GlobalsMD;
+  DenseMap<const AllocaInst *, bool> ProcessedAllocas;
+};
+
+class AddressSanitizerLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  explicit AddressSanitizerLegacyPass(bool CompileKernel = false,
+                                      bool Recover = false,
+                                      bool UseAfterScope = false)
+      : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover),
+        UseAfterScope(UseAfterScope) {
+    initializeAddressSanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+    return "AddressSanitizerFunctionPass";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+
+  bool runOnFunction(Function &F) override {
+    GlobalsMetadata &GlobalsMD =
+        getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+    const TargetLibraryInfo *TLI =
+        &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+    AddressSanitizer ASan(*F.getParent(), &GlobalsMD, CompileKernel, Recover,
+                          UseAfterScope);
+    return ASan.instrumentFunction(F, TLI);
+  }
+
+private:
+  bool CompileKernel;
+  bool Recover;
+  bool UseAfterScope;
+};
+
+class ModuleAddressSanitizer {
+public:
+  ModuleAddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD,
+                         bool CompileKernel = false, bool Recover = false,
+                         bool UseGlobalsGC = true, bool UseOdrIndicator = false)
+      : GlobalsMD(*GlobalsMD),
+        CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan
+                                                            : CompileKernel),
+        Recover(ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover),
+        UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC && !this->CompileKernel),
+        // Enable aliases as they should have no downside with ODR indicators.
+        UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias),
+        UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator),
+        // Not a typo: ClWithComdat is almost completely pointless without
+        // ClUseGlobalsGC (because then it only works on modules without
+        // globals, which are rare); it is a prerequisite for ClUseGlobalsGC;
+        // and both suffer from gold PR19002 for which UseGlobalsGC constructor
+        // argument is designed as workaround. Therefore, disable both
+        // ClWithComdat and ClUseGlobalsGC unless the frontend says it's ok to
+        // do globals-gc.
+        UseCtorComdat(UseGlobalsGC && ClWithComdat && !this->CompileKernel) {
+    C = &(M.getContext());
+    int LongSize = M.getDataLayout().getPointerSizeInBits();
+    IntptrTy = Type::getIntNTy(*C, LongSize);
+    TargetTriple = Triple(M.getTargetTriple());
+    Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
+  }
+
+  bool instrumentModule(Module &);
+
+private:
+  void initializeCallbacks(Module &M);
+
+  bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat);
+  void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M,
+                             ArrayRef<GlobalVariable *> ExtendedGlobals,
+                             ArrayRef<Constant *> MetadataInitializers);
+  void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M,
+                            ArrayRef<GlobalVariable *> ExtendedGlobals,
+                            ArrayRef<Constant *> MetadataInitializers,
+                            const std::string &UniqueModuleId);
+  void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M,
+                              ArrayRef<GlobalVariable *> ExtendedGlobals,
+                              ArrayRef<Constant *> MetadataInitializers);
+  void
+  InstrumentGlobalsWithMetadataArray(IRBuilder<> &IRB, Module &M,
+                                     ArrayRef<GlobalVariable *> ExtendedGlobals,
+                                     ArrayRef<Constant *> MetadataInitializers);
+
+  GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer,
+                                       StringRef OriginalName);
+  void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata,
+                                  StringRef InternalSuffix);
+  Instruction *CreateAsanModuleDtor(Module &M);
+
+  const GlobalVariable *getExcludedAliasedGlobal(const GlobalAlias &GA) const;
+  bool shouldInstrumentGlobal(GlobalVariable *G) const;
+  bool ShouldUseMachOGlobalsSection() const;
+  StringRef getGlobalMetadataSection() const;
+  void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName);
+  void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
+  uint64_t getMinRedzoneSizeForGlobal() const {
+    return getRedzoneSizeForScale(Mapping.Scale);
+  }
+  uint64_t getRedzoneSizeForGlobal(uint64_t SizeInBytes) const;
+  int GetAsanVersion(const Module &M) const;
+
+  const GlobalsMetadata &GlobalsMD;
+  bool CompileKernel;
+  bool Recover;
+  bool UseGlobalsGC;
+  bool UsePrivateAlias;
+  bool UseOdrIndicator;
+  bool UseCtorComdat;
+  Type *IntptrTy;
+  LLVMContext *C;
+  Triple TargetTriple;
+  ShadowMapping Mapping;
+  FunctionCallee AsanPoisonGlobals;
+  FunctionCallee AsanUnpoisonGlobals;
+  FunctionCallee AsanRegisterGlobals;
+  FunctionCallee AsanUnregisterGlobals;
+  FunctionCallee AsanRegisterImageGlobals;
+  FunctionCallee AsanUnregisterImageGlobals;
+  FunctionCallee AsanRegisterElfGlobals;
+  FunctionCallee AsanUnregisterElfGlobals;
+
+  Function *AsanCtorFunction = nullptr;
+  Function *AsanDtorFunction = nullptr;
+};
+
+class ModuleAddressSanitizerLegacyPass : public ModulePass {
+public:
+  static char ID;
+
+  explicit ModuleAddressSanitizerLegacyPass(bool CompileKernel = false,
+                                            bool Recover = false,
+                                            bool UseGlobalGC = true,
+                                            bool UseOdrIndicator = false)
+      : ModulePass(ID), CompileKernel(CompileKernel), Recover(Recover),
+        UseGlobalGC(UseGlobalGC), UseOdrIndicator(UseOdrIndicator) {
+    initializeModuleAddressSanitizerLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "ModuleAddressSanitizer"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+  }
+
+  bool runOnModule(Module &M) override {
+    GlobalsMetadata &GlobalsMD =
+        getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+    ModuleAddressSanitizer ASanModule(M, &GlobalsMD, CompileKernel, Recover,
+                                      UseGlobalGC, UseOdrIndicator);
+    return ASanModule.instrumentModule(M);
+  }
+
+private:
+  bool CompileKernel;
+  bool Recover;
+  bool UseGlobalGC;
+  bool UseOdrIndicator;
+};
+
+// Stack poisoning does not play well with exception handling.
+// When an exception is thrown, we essentially bypass the code
+// that unpoisones the stack. This is why the run-time library has
+// to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire
+// stack in the interceptor. This however does not work inside the
+// actual function which catches the exception. Most likely because the
+// compiler hoists the load of the shadow value somewhere too high.
+// This causes asan to report a non-existing bug on 453.povray.
+// It sounds like an LLVM bug.
+struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
+  Function &F;
+  AddressSanitizer &ASan;
+  DIBuilder DIB;
+  LLVMContext *C;
+  Type *IntptrTy;
+  Type *IntptrPtrTy;
+  ShadowMapping Mapping;
+
+  SmallVector<AllocaInst *, 16> AllocaVec;
+  SmallVector<AllocaInst *, 16> StaticAllocasToMoveUp;
+  SmallVector<Instruction *, 8> RetVec;
+  unsigned StackAlignment;
+
+  FunctionCallee AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
+      AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
+  FunctionCallee AsanSetShadowFunc[0x100] = {};
+  FunctionCallee AsanPoisonStackMemoryFunc, AsanUnpoisonStackMemoryFunc;
+  FunctionCallee AsanAllocaPoisonFunc, AsanAllocasUnpoisonFunc;
+
+  // Stores a place and arguments of poisoning/unpoisoning call for alloca.
+  struct AllocaPoisonCall {
+    IntrinsicInst *InsBefore;
+    AllocaInst *AI;
+    uint64_t Size;
+    bool DoPoison;
+  };
+  SmallVector<AllocaPoisonCall, 8> DynamicAllocaPoisonCallVec;
+  SmallVector<AllocaPoisonCall, 8> StaticAllocaPoisonCallVec;
+  bool HasUntracedLifetimeIntrinsic = false;
+
+  SmallVector<AllocaInst *, 1> DynamicAllocaVec;
+  SmallVector<IntrinsicInst *, 1> StackRestoreVec;
+  AllocaInst *DynamicAllocaLayout = nullptr;
+  IntrinsicInst *LocalEscapeCall = nullptr;
+
+  bool HasInlineAsm = false;
+  bool HasReturnsTwiceCall = false;
+
+  FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
+      : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false),
+        C(ASan.C), IntptrTy(ASan.IntptrTy),
+        IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping),
+        StackAlignment(1 << Mapping.Scale) {}
+
+  bool runOnFunction() {
+    if (!ClStack) return false;
+
+    if (ClRedzoneByvalArgs)
+      copyArgsPassedByValToAllocas();
+
+    // Collect alloca, ret, lifetime instructions etc.
+    for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB);
+
+    if (AllocaVec.empty() && DynamicAllocaVec.empty()) return false;
+
+    initializeCallbacks(*F.getParent());
+
+    if (HasUntracedLifetimeIntrinsic) {
+      // If there are lifetime intrinsics which couldn't be traced back to an
+      // alloca, we may not know exactly when a variable enters scope, and
+      // therefore should "fail safe" by not poisoning them.
+      StaticAllocaPoisonCallVec.clear();
+      DynamicAllocaPoisonCallVec.clear();
+    }
+
+    processDynamicAllocas();
+    processStaticAllocas();
+
+    if (ClDebugStack) {
+      LLVM_DEBUG(dbgs() << F);
+    }
+    return true;
+  }
+
+  // Arguments marked with the "byval" attribute are implicitly copied without
+  // using an alloca instruction.  To produce redzones for those arguments, we
+  // copy them a second time into memory allocated with an alloca instruction.
+  void copyArgsPassedByValToAllocas();
+
+  // Finds all Alloca instructions and puts
+  // poisoned red zones around all of them.
+  // Then unpoison everything back before the function returns.
+  void processStaticAllocas();
+  void processDynamicAllocas();
+
+  void createDynamicAllocasInitStorage();
+
+  // ----------------------- Visitors.
   /// Collect all Ret instructions, or the musttail call instruction if it
   /// precedes the return instruction.
   void visitReturnInst(ReturnInst &RI) {
@@ -969,910 +969,910 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
     else
       RetVec.push_back(&RI);
   }
- 
-  /// Collect all Resume instructions. 
-  void visitResumeInst(ResumeInst &RI) { RetVec.push_back(&RI); } 
- 
-  /// Collect all CatchReturnInst instructions. 
-  void visitCleanupReturnInst(CleanupReturnInst &CRI) { RetVec.push_back(&CRI); } 
- 
-  void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore, 
-                                        Value *SavedStack) { 
-    IRBuilder<> IRB(InstBefore); 
-    Value *DynamicAreaPtr = IRB.CreatePtrToInt(SavedStack, IntptrTy); 
-    // When we insert _asan_allocas_unpoison before @llvm.stackrestore, we 
-    // need to adjust extracted SP to compute the address of the most recent 
-    // alloca. We have a special @llvm.get.dynamic.area.offset intrinsic for 
-    // this purpose. 
-    if (!isa<ReturnInst>(InstBefore)) { 
-      Function *DynamicAreaOffsetFunc = Intrinsic::getDeclaration( 
-          InstBefore->getModule(), Intrinsic::get_dynamic_area_offset, 
-          {IntptrTy}); 
- 
-      Value *DynamicAreaOffset = IRB.CreateCall(DynamicAreaOffsetFunc, {}); 
- 
-      DynamicAreaPtr = IRB.CreateAdd(IRB.CreatePtrToInt(SavedStack, IntptrTy), 
-                                     DynamicAreaOffset); 
-    } 
- 
-    IRB.CreateCall( 
-        AsanAllocasUnpoisonFunc, 
-        {IRB.CreateLoad(IntptrTy, DynamicAllocaLayout), DynamicAreaPtr}); 
-  } 
- 
-  // Unpoison dynamic allocas redzones. 
-  void unpoisonDynamicAllocas() { 
+
+  /// Collect all Resume instructions.
+  void visitResumeInst(ResumeInst &RI) { RetVec.push_back(&RI); }
+
+  /// Collect all CatchReturnInst instructions.
+  void visitCleanupReturnInst(CleanupReturnInst &CRI) { RetVec.push_back(&CRI); }
+
+  void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore,
+                                        Value *SavedStack) {
+    IRBuilder<> IRB(InstBefore);
+    Value *DynamicAreaPtr = IRB.CreatePtrToInt(SavedStack, IntptrTy);
+    // When we insert _asan_allocas_unpoison before @llvm.stackrestore, we
+    // need to adjust extracted SP to compute the address of the most recent
+    // alloca. We have a special @llvm.get.dynamic.area.offset intrinsic for
+    // this purpose.
+    if (!isa<ReturnInst>(InstBefore)) {
+      Function *DynamicAreaOffsetFunc = Intrinsic::getDeclaration(
+          InstBefore->getModule(), Intrinsic::get_dynamic_area_offset,
+          {IntptrTy});
+
+      Value *DynamicAreaOffset = IRB.CreateCall(DynamicAreaOffsetFunc, {});
+
+      DynamicAreaPtr = IRB.CreateAdd(IRB.CreatePtrToInt(SavedStack, IntptrTy),
+                                     DynamicAreaOffset);
+    }
+
+    IRB.CreateCall(
+        AsanAllocasUnpoisonFunc,
+        {IRB.CreateLoad(IntptrTy, DynamicAllocaLayout), DynamicAreaPtr});
+  }
+
+  // Unpoison dynamic allocas redzones.
+  void unpoisonDynamicAllocas() {
     for (Instruction *Ret : RetVec)
-      unpoisonDynamicAllocasBeforeInst(Ret, DynamicAllocaLayout); 
- 
+      unpoisonDynamicAllocasBeforeInst(Ret, DynamicAllocaLayout);
+
     for (Instruction *StackRestoreInst : StackRestoreVec)
-      unpoisonDynamicAllocasBeforeInst(StackRestoreInst, 
-                                       StackRestoreInst->getOperand(0)); 
-  } 
- 
-  // Deploy and poison redzones around dynamic alloca call. To do this, we 
-  // should replace this call with another one with changed parameters and 
-  // replace all its uses with new address, so 
-  //   addr = alloca type, old_size, align 
-  // is replaced by 
-  //   new_size = (old_size + additional_size) * sizeof(type) 
-  //   tmp = alloca i8, new_size, max(align, 32) 
-  //   addr = tmp + 32 (first 32 bytes are for the left redzone). 
-  // Additional_size is added to make new memory allocation contain not only 
-  // requested memory, but also left, partial and right redzones. 
-  void handleDynamicAllocaCall(AllocaInst *AI); 
- 
-  /// Collect Alloca instructions we want (and can) handle. 
-  void visitAllocaInst(AllocaInst &AI) { 
-    if (!ASan.isInterestingAlloca(AI)) { 
-      if (AI.isStaticAlloca()) { 
-        // Skip over allocas that are present *before* the first instrumented 
-        // alloca, we don't want to move those around. 
-        if (AllocaVec.empty()) 
-          return; 
- 
-        StaticAllocasToMoveUp.push_back(&AI); 
-      } 
-      return; 
-    } 
- 
-    StackAlignment = std::max(StackAlignment, AI.getAlignment()); 
-    if (!AI.isStaticAlloca()) 
-      DynamicAllocaVec.push_back(&AI); 
-    else 
-      AllocaVec.push_back(&AI); 
-  } 
- 
-  /// Collect lifetime intrinsic calls to check for use-after-scope 
-  /// errors. 
-  void visitIntrinsicInst(IntrinsicInst &II) { 
-    Intrinsic::ID ID = II.getIntrinsicID(); 
-    if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II); 
-    if (ID == Intrinsic::localescape) LocalEscapeCall = &II; 
-    if (!ASan.UseAfterScope) 
-      return; 
-    if (!II.isLifetimeStartOrEnd()) 
-      return; 
-    // Found lifetime intrinsic, add ASan instrumentation if necessary. 
-    auto *Size = cast<ConstantInt>(II.getArgOperand(0)); 
-    // If size argument is undefined, don't do anything. 
-    if (Size->isMinusOne()) return; 
-    // Check that size doesn't saturate uint64_t and can 
-    // be stored in IntptrTy. 
-    const uint64_t SizeValue = Size->getValue().getLimitedValue(); 
-    if (SizeValue == ~0ULL || 
-        !ConstantInt::isValueValidForType(IntptrTy, SizeValue)) 
-      return; 
-    // Find alloca instruction that corresponds to llvm.lifetime argument. 
+      unpoisonDynamicAllocasBeforeInst(StackRestoreInst,
+                                       StackRestoreInst->getOperand(0));
+  }
+
+  // Deploy and poison redzones around dynamic alloca call. To do this, we
+  // should replace this call with another one with changed parameters and
+  // replace all its uses with new address, so
+  //   addr = alloca type, old_size, align
+  // is replaced by
+  //   new_size = (old_size + additional_size) * sizeof(type)
+  //   tmp = alloca i8, new_size, max(align, 32)
+  //   addr = tmp + 32 (first 32 bytes are for the left redzone).
+  // Additional_size is added to make new memory allocation contain not only
+  // requested memory, but also left, partial and right redzones.
+  void handleDynamicAllocaCall(AllocaInst *AI);
+
+  /// Collect Alloca instructions we want (and can) handle.
+  void visitAllocaInst(AllocaInst &AI) {
+    if (!ASan.isInterestingAlloca(AI)) {
+      if (AI.isStaticAlloca()) {
+        // Skip over allocas that are present *before* the first instrumented
+        // alloca, we don't want to move those around.
+        if (AllocaVec.empty())
+          return;
+
+        StaticAllocasToMoveUp.push_back(&AI);
+      }
+      return;
+    }
+
+    StackAlignment = std::max(StackAlignment, AI.getAlignment());
+    if (!AI.isStaticAlloca())
+      DynamicAllocaVec.push_back(&AI);
+    else
+      AllocaVec.push_back(&AI);
+  }
+
+  /// Collect lifetime intrinsic calls to check for use-after-scope
+  /// errors.
+  void visitIntrinsicInst(IntrinsicInst &II) {
+    Intrinsic::ID ID = II.getIntrinsicID();
+    if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II);
+    if (ID == Intrinsic::localescape) LocalEscapeCall = &II;
+    if (!ASan.UseAfterScope)
+      return;
+    if (!II.isLifetimeStartOrEnd())
+      return;
+    // Found lifetime intrinsic, add ASan instrumentation if necessary.
+    auto *Size = cast<ConstantInt>(II.getArgOperand(0));
+    // If size argument is undefined, don't do anything.
+    if (Size->isMinusOne()) return;
+    // Check that size doesn't saturate uint64_t and can
+    // be stored in IntptrTy.
+    const uint64_t SizeValue = Size->getValue().getLimitedValue();
+    if (SizeValue == ~0ULL ||
+        !ConstantInt::isValueValidForType(IntptrTy, SizeValue))
+      return;
+    // Find alloca instruction that corresponds to llvm.lifetime argument.
     // Currently we can only handle lifetime markers pointing to the
     // beginning of the alloca.
     AllocaInst *AI = findAllocaForValue(II.getArgOperand(1), true);
-    if (!AI) { 
-      HasUntracedLifetimeIntrinsic = true; 
-      return; 
-    } 
-    // We're interested only in allocas we can handle. 
-    if (!ASan.isInterestingAlloca(*AI)) 
-      return; 
-    bool DoPoison = (ID == Intrinsic::lifetime_end); 
-    AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison}; 
-    if (AI->isStaticAlloca()) 
-      StaticAllocaPoisonCallVec.push_back(APC); 
-    else if (ClInstrumentDynamicAllocas) 
-      DynamicAllocaPoisonCallVec.push_back(APC); 
-  } 
- 
-  void visitCallBase(CallBase &CB) { 
-    if (CallInst *CI = dyn_cast<CallInst>(&CB)) { 
-      HasInlineAsm |= CI->isInlineAsm() && &CB != ASan.LocalDynamicShadow; 
-      HasReturnsTwiceCall |= CI->canReturnTwice(); 
-    } 
-  } 
- 
-  // ---------------------- Helpers. 
-  void initializeCallbacks(Module &M); 
- 
-  // Copies bytes from ShadowBytes into shadow memory for indexes where 
-  // ShadowMask is not zero. If ShadowMask[i] is zero, we assume that 
-  // ShadowBytes[i] is constantly zero and doesn't need to be overwritten. 
-  void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes, 
-                    IRBuilder<> &IRB, Value *ShadowBase); 
-  void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes, 
-                    size_t Begin, size_t End, IRBuilder<> &IRB, 
-                    Value *ShadowBase); 
-  void copyToShadowInline(ArrayRef<uint8_t> ShadowMask, 
-                          ArrayRef<uint8_t> ShadowBytes, size_t Begin, 
-                          size_t End, IRBuilder<> &IRB, Value *ShadowBase); 
- 
-  void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison); 
- 
-  Value *createAllocaForLayout(IRBuilder<> &IRB, const ASanStackFrameLayout &L, 
-                               bool Dynamic); 
-  PHINode *createPHI(IRBuilder<> &IRB, Value *Cond, Value *ValueIfTrue, 
-                     Instruction *ThenTerm, Value *ValueIfFalse); 
-}; 
- 
-} // end anonymous namespace 
- 
-void LocationMetadata::parse(MDNode *MDN) { 
-  assert(MDN->getNumOperands() == 3); 
-  MDString *DIFilename = cast<MDString>(MDN->getOperand(0)); 
-  Filename = DIFilename->getString(); 
-  LineNo = mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue(); 
-  ColumnNo = 
-      mdconst::extract<ConstantInt>(MDN->getOperand(2))->getLimitedValue(); 
-} 
- 
-// FIXME: It would be cleaner to instead attach relevant metadata to the globals 
-// we want to sanitize instead and reading this metadata on each pass over a 
-// function instead of reading module level metadata at first. 
-GlobalsMetadata::GlobalsMetadata(Module &M) { 
-  NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals"); 
-  if (!Globals) 
-    return; 
-  for (auto MDN : Globals->operands()) { 
-    // Metadata node contains the global and the fields of "Entry". 
-    assert(MDN->getNumOperands() == 5); 
-    auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0)); 
-    // The optimizer may optimize away a global entirely. 
-    if (!V) 
-      continue; 
-    auto *StrippedV = V->stripPointerCasts(); 
-    auto *GV = dyn_cast<GlobalVariable>(StrippedV); 
-    if (!GV) 
-      continue; 
-    // We can already have an entry for GV if it was merged with another 
-    // global. 
-    Entry &E = Entries[GV]; 
-    if (auto *Loc = cast_or_null<MDNode>(MDN->getOperand(1))) 
-      E.SourceLoc.parse(Loc); 
-    if (auto *Name = cast_or_null<MDString>(MDN->getOperand(2))) 
-      E.Name = Name->getString(); 
-    ConstantInt *IsDynInit = mdconst::extract<ConstantInt>(MDN->getOperand(3)); 
-    E.IsDynInit |= IsDynInit->isOne(); 
-    ConstantInt *IsExcluded = 
-        mdconst::extract<ConstantInt>(MDN->getOperand(4)); 
-    E.IsExcluded |= IsExcluded->isOne(); 
-  } 
-} 
- 
-AnalysisKey ASanGlobalsMetadataAnalysis::Key; 
- 
-GlobalsMetadata ASanGlobalsMetadataAnalysis::run(Module &M, 
-                                                 ModuleAnalysisManager &AM) { 
-  return GlobalsMetadata(M); 
-} 
- 
-AddressSanitizerPass::AddressSanitizerPass(bool CompileKernel, bool Recover, 
-                                           bool UseAfterScope) 
-    : CompileKernel(CompileKernel), Recover(Recover), 
-      UseAfterScope(UseAfterScope) {} 
- 
-PreservedAnalyses AddressSanitizerPass::run(Function &F, 
-                                            AnalysisManager<Function> &AM) { 
-  auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 
-  Module &M = *F.getParent(); 
-  if (auto *R = MAMProxy.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) { 
-    const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F); 
-    AddressSanitizer Sanitizer(M, R, CompileKernel, Recover, UseAfterScope); 
-    if (Sanitizer.instrumentFunction(F, TLI)) 
-      return PreservedAnalyses::none(); 
-    return PreservedAnalyses::all(); 
-  } 
- 
-  report_fatal_error( 
-      "The ASanGlobalsMetadataAnalysis is required to run before " 
-      "AddressSanitizer can run"); 
-  return PreservedAnalyses::all(); 
-} 
- 
-ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(bool CompileKernel, 
-                                                       bool Recover, 
-                                                       bool UseGlobalGC, 
-                                                       bool UseOdrIndicator) 
-    : CompileKernel(CompileKernel), Recover(Recover), UseGlobalGC(UseGlobalGC), 
-      UseOdrIndicator(UseOdrIndicator) {} 
- 
-PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M, 
-                                                  AnalysisManager<Module> &AM) { 
-  GlobalsMetadata &GlobalsMD = AM.getResult<ASanGlobalsMetadataAnalysis>(M); 
-  ModuleAddressSanitizer Sanitizer(M, &GlobalsMD, CompileKernel, Recover, 
-                                   UseGlobalGC, UseOdrIndicator); 
-  if (Sanitizer.instrumentModule(M)) 
-    return PreservedAnalyses::none(); 
-  return PreservedAnalyses::all(); 
-} 
- 
-INITIALIZE_PASS(ASanGlobalsMetadataWrapperPass, "asan-globals-md", 
-                "Read metadata to mark which globals should be instrumented " 
-                "when running ASan.", 
-                false, true) 
- 
-char AddressSanitizerLegacyPass::ID = 0; 
- 
-INITIALIZE_PASS_BEGIN( 
-    AddressSanitizerLegacyPass, "asan", 
-    "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, 
-    false) 
-INITIALIZE_PASS_DEPENDENCY(ASanGlobalsMetadataWrapperPass) 
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 
-INITIALIZE_PASS_END( 
-    AddressSanitizerLegacyPass, "asan", 
-    "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, 
-    false) 
- 
-FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel, 
-                                                       bool Recover, 
-                                                       bool UseAfterScope) { 
-  assert(!CompileKernel || Recover); 
-  return new AddressSanitizerLegacyPass(CompileKernel, Recover, UseAfterScope); 
-} 
- 
-char ModuleAddressSanitizerLegacyPass::ID = 0; 
- 
-INITIALIZE_PASS( 
-    ModuleAddressSanitizerLegacyPass, "asan-module", 
-    "AddressSanitizer: detects use-after-free and out-of-bounds bugs." 
-    "ModulePass", 
-    false, false) 
- 
-ModulePass *llvm::createModuleAddressSanitizerLegacyPassPass( 
-    bool CompileKernel, bool Recover, bool UseGlobalsGC, bool UseOdrIndicator) { 
-  assert(!CompileKernel || Recover); 
-  return new ModuleAddressSanitizerLegacyPass(CompileKernel, Recover, 
-                                              UseGlobalsGC, UseOdrIndicator); 
-} 
- 
-static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { 
-  size_t Res = countTrailingZeros(TypeSize / 8); 
-  assert(Res < kNumberOfAccessSizes); 
-  return Res; 
-} 
- 
-/// Create a global describing a source location. 
-static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M, 
-                                                       LocationMetadata MD) { 
-  Constant *LocData[] = { 
-      createPrivateGlobalForString(M, MD.Filename, true, kAsanGenPrefix), 
-      ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.LineNo), 
-      ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.ColumnNo), 
-  }; 
-  auto LocStruct = ConstantStruct::getAnon(LocData); 
-  auto GV = new GlobalVariable(M, LocStruct->getType(), true, 
-                               GlobalValue::PrivateLinkage, LocStruct, 
-                               kAsanGenPrefix); 
-  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 
-  return GV; 
-} 
- 
-/// Check if \p G has been created by a trusted compiler pass. 
-static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) { 
-  // Do not instrument @llvm.global_ctors, @llvm.used, etc. 
-  if (G->getName().startswith("llvm.")) 
-    return true; 
- 
-  // Do not instrument asan globals. 
-  if (G->getName().startswith(kAsanGenPrefix) || 
-      G->getName().startswith(kSanCovGenPrefix) || 
-      G->getName().startswith(kODRGenPrefix)) 
-    return true; 
- 
-  // Do not instrument gcov counter arrays. 
-  if (G->getName() == "__llvm_gcov_ctr") 
-    return true; 
- 
-  return false; 
-} 
- 
-Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { 
-  // Shadow >> scale 
-  Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); 
-  if (Mapping.Offset == 0) return Shadow; 
-  // (Shadow >> scale) | offset 
-  Value *ShadowBase; 
-  if (LocalDynamicShadow) 
-    ShadowBase = LocalDynamicShadow; 
-  else 
-    ShadowBase = ConstantInt::get(IntptrTy, Mapping.Offset); 
-  if (Mapping.OrShadowOffset) 
-    return IRB.CreateOr(Shadow, ShadowBase); 
-  else 
-    return IRB.CreateAdd(Shadow, ShadowBase); 
-} 
- 
-// Instrument memset/memmove/memcpy 
-void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { 
-  IRBuilder<> IRB(MI); 
-  if (isa<MemTransferInst>(MI)) { 
-    IRB.CreateCall( 
-        isa<MemMoveInst>(MI) ? AsanMemmove : AsanMemcpy, 
-        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), 
-         IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()), 
-         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 
-  } else if (isa<MemSetInst>(MI)) { 
-    IRB.CreateCall( 
-        AsanMemset, 
-        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), 
-         IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), 
-         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 
-  } 
-  MI->eraseFromParent(); 
-} 
- 
-/// Check if we want (and can) handle this alloca. 
-bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { 
-  auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI); 
- 
-  if (PreviouslySeenAllocaInfo != ProcessedAllocas.end()) 
-    return PreviouslySeenAllocaInfo->getSecond(); 
- 
-  bool IsInteresting = 
-      (AI.getAllocatedType()->isSized() && 
-       // alloca() may be called with 0 size, ignore it. 
-       ((!AI.isStaticAlloca()) || getAllocaSizeInBytes(AI) > 0) && 
-       // We are only interested in allocas not promotable to registers. 
-       // Promotable allocas are common under -O0. 
-       (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) && 
-       // inalloca allocas are not treated as static, and we don't want 
-       // dynamic alloca instrumentation for them as well. 
-       !AI.isUsedWithInAlloca() && 
-       // swifterror allocas are register promoted by ISel 
-       !AI.isSwiftError()); 
- 
-  ProcessedAllocas[&AI] = IsInteresting; 
-  return IsInteresting; 
-} 
- 
-bool AddressSanitizer::ignoreAccess(Value *Ptr) { 
-  // Do not instrument acesses from different address spaces; we cannot deal 
-  // with them. 
-  Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType()); 
-  if (PtrTy->getPointerAddressSpace() != 0) 
-    return true; 
- 
-  // Ignore swifterror addresses. 
-  // swifterror memory addresses are mem2reg promoted by instruction 
-  // selection. As such they cannot have regular uses like an instrumentation 
-  // function and it makes no sense to track them as memory. 
-  if (Ptr->isSwiftError()) 
-    return true; 
- 
-  // Treat memory accesses to promotable allocas as non-interesting since they 
-  // will not cause memory violations. This greatly speeds up the instrumented 
-  // executable at -O0. 
-  if (auto AI = dyn_cast_or_null<AllocaInst>(Ptr)) 
-    if (ClSkipPromotableAllocas && !isInterestingAlloca(*AI)) 
-      return true; 
- 
-  return false; 
-} 
- 
-void AddressSanitizer::getInterestingMemoryOperands( 
-    Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) { 
-  // Skip memory accesses inserted by another instrumentation. 
-  if (I->hasMetadata("nosanitize")) 
-    return; 
- 
-  // Do not instrument the load fetching the dynamic shadow address. 
-  if (LocalDynamicShadow == I) 
-    return; 
- 
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 
-    if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand())) 
-      return; 
-    Interesting.emplace_back(I, LI->getPointerOperandIndex(), false, 
-                             LI->getType(), LI->getAlign()); 
-  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 
-    if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand())) 
-      return; 
-    Interesting.emplace_back(I, SI->getPointerOperandIndex(), true, 
-                             SI->getValueOperand()->getType(), SI->getAlign()); 
-  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 
-    if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand())) 
-      return; 
-    Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, 
-                             RMW->getValOperand()->getType(), None); 
-  } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { 
-    if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand())) 
-      return; 
-    Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, 
-                             XCHG->getCompareOperand()->getType(), None); 
-  } else if (auto CI = dyn_cast<CallInst>(I)) { 
-    auto *F = CI->getCalledFunction(); 
-    if (F && (F->getName().startswith("llvm.masked.load.") || 
-              F->getName().startswith("llvm.masked.store."))) { 
-      bool IsWrite = F->getName().startswith("llvm.masked.store."); 
-      // Masked store has an initial operand for the value. 
-      unsigned OpOffset = IsWrite ? 1 : 0; 
-      if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads) 
-        return; 
- 
-      auto BasePtr = CI->getOperand(OpOffset); 
-      if (ignoreAccess(BasePtr)) 
-        return; 
-      auto Ty = cast<PointerType>(BasePtr->getType())->getElementType(); 
-      MaybeAlign Alignment = Align(1); 
-      // Otherwise no alignment guarantees. We probably got Undef. 
-      if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset))) 
-        Alignment = Op->getMaybeAlignValue(); 
-      Value *Mask = CI->getOperand(2 + OpOffset); 
-      Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask); 
-    } else { 
-      for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) { 
-        if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || 
-            ignoreAccess(CI->getArgOperand(ArgNo))) 
-          continue; 
-        Type *Ty = CI->getParamByValType(ArgNo); 
-        Interesting.emplace_back(I, ArgNo, false, Ty, Align(1)); 
-      } 
-    } 
-  } 
-} 
- 
-static bool isPointerOperand(Value *V) { 
-  return V->getType()->isPointerTy() || isa<PtrToIntInst>(V); 
-} 
- 
-// This is a rough heuristic; it may cause both false positives and 
-// false negatives. The proper implementation requires cooperation with 
-// the frontend. 
-static bool isInterestingPointerComparison(Instruction *I) { 
-  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) { 
-    if (!Cmp->isRelational()) 
-      return false; 
-  } else { 
-    return false; 
-  } 
-  return isPointerOperand(I->getOperand(0)) && 
-         isPointerOperand(I->getOperand(1)); 
-} 
- 
-// This is a rough heuristic; it may cause both false positives and 
-// false negatives. The proper implementation requires cooperation with 
-// the frontend. 
-static bool isInterestingPointerSubtraction(Instruction *I) { 
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { 
-    if (BO->getOpcode() != Instruction::Sub) 
-      return false; 
-  } else { 
-    return false; 
-  } 
-  return isPointerOperand(I->getOperand(0)) && 
-         isPointerOperand(I->getOperand(1)); 
-} 
- 
-bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) { 
-  // If a global variable does not have dynamic initialization we don't 
-  // have to instrument it.  However, if a global does not have initializer 
-  // at all, we assume it has dynamic initializer (in other TU). 
-  // 
-  // FIXME: Metadata should be attched directly to the global directly instead 
-  // of being added to llvm.asan.globals. 
-  return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit; 
-} 
- 
-void AddressSanitizer::instrumentPointerComparisonOrSubtraction( 
-    Instruction *I) { 
-  IRBuilder<> IRB(I); 
-  FunctionCallee F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction; 
-  Value *Param[2] = {I->getOperand(0), I->getOperand(1)}; 
-  for (Value *&i : Param) { 
-    if (i->getType()->isPointerTy()) 
-      i = IRB.CreatePointerCast(i, IntptrTy); 
-  } 
-  IRB.CreateCall(F, Param); 
-} 
- 
-static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I, 
-                                Instruction *InsertBefore, Value *Addr, 
-                                MaybeAlign Alignment, unsigned Granularity, 
-                                uint32_t TypeSize, bool IsWrite, 
-                                Value *SizeArgument, bool UseCalls, 
-                                uint32_t Exp) { 
-  // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check 
-  // if the data is properly aligned. 
-  if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || 
-       TypeSize == 128) && 
-      (!Alignment || *Alignment >= Granularity || *Alignment >= TypeSize / 8)) 
-    return Pass->instrumentAddress(I, InsertBefore, Addr, TypeSize, IsWrite, 
-                                   nullptr, UseCalls, Exp); 
-  Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeSize, 
-                                         IsWrite, nullptr, UseCalls, Exp); 
-} 
- 
-static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, 
-                                        const DataLayout &DL, Type *IntptrTy, 
-                                        Value *Mask, Instruction *I, 
-                                        Value *Addr, MaybeAlign Alignment, 
-                                        unsigned Granularity, uint32_t TypeSize, 
-                                        bool IsWrite, Value *SizeArgument, 
-                                        bool UseCalls, uint32_t Exp) { 
-  auto *VTy = cast<FixedVectorType>( 
-      cast<PointerType>(Addr->getType())->getElementType()); 
-  uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); 
-  unsigned Num = VTy->getNumElements(); 
-  auto Zero = ConstantInt::get(IntptrTy, 0); 
-  for (unsigned Idx = 0; Idx < Num; ++Idx) { 
-    Value *InstrumentedAddress = nullptr; 
-    Instruction *InsertBefore = I; 
-    if (auto *Vector = dyn_cast<ConstantVector>(Mask)) { 
-      // dyn_cast as we might get UndefValue 
-      if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) { 
-        if (Masked->isZero()) 
-          // Mask is constant false, so no instrumentation needed. 
-          continue; 
-        // If we have a true or undef value, fall through to doInstrumentAddress 
-        // with InsertBefore == I 
-      } 
-    } else { 
-      IRBuilder<> IRB(I); 
-      Value *MaskElem = IRB.CreateExtractElement(Mask, Idx); 
-      Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false); 
-      InsertBefore = ThenTerm; 
-    } 
- 
-    IRBuilder<> IRB(InsertBefore); 
-    InstrumentedAddress = 
-        IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); 
-    doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment, 
-                        Granularity, ElemTypeSize, IsWrite, SizeArgument, 
-                        UseCalls, Exp); 
-  } 
-} 
- 
-void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, 
-                                     InterestingMemoryOperand &O, bool UseCalls, 
-                                     const DataLayout &DL) { 
-  Value *Addr = O.getPtr(); 
- 
-  // Optimization experiments. 
-  // The experiments can be used to evaluate potential optimizations that remove 
-  // instrumentation (assess false negatives). Instead of completely removing 
-  // some instrumentation, you set Exp to a non-zero value (mask of optimization 
-  // experiments that want to remove instrumentation of this instruction). 
-  // If Exp is non-zero, this pass will emit special calls into runtime 
-  // (e.g. __asan_report_exp_load1 instead of __asan_report_load1). These calls 
-  // make runtime terminate the program in a special way (with a different 
-  // exit status). Then you run the new compiler on a buggy corpus, collect 
-  // the special terminations (ideally, you don't see them at all -- no false 
-  // negatives) and make the decision on the optimization. 
-  uint32_t Exp = ClForceExperiment; 
- 
-  if (ClOpt && ClOptGlobals) { 
-    // If initialization order checking is disabled, a simple access to a 
-    // dynamically initialized global is always valid. 
+    if (!AI) {
+      HasUntracedLifetimeIntrinsic = true;
+      return;
+    }
+    // We're interested only in allocas we can handle.
+    if (!ASan.isInterestingAlloca(*AI))
+      return;
+    bool DoPoison = (ID == Intrinsic::lifetime_end);
+    AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
+    if (AI->isStaticAlloca())
+      StaticAllocaPoisonCallVec.push_back(APC);
+    else if (ClInstrumentDynamicAllocas)
+      DynamicAllocaPoisonCallVec.push_back(APC);
+  }
+
+  void visitCallBase(CallBase &CB) {
+    if (CallInst *CI = dyn_cast<CallInst>(&CB)) {
+      HasInlineAsm |= CI->isInlineAsm() && &CB != ASan.LocalDynamicShadow;
+      HasReturnsTwiceCall |= CI->canReturnTwice();
+    }
+  }
+
+  // ---------------------- Helpers.
+  void initializeCallbacks(Module &M);
+
+  // Copies bytes from ShadowBytes into shadow memory for indexes where
+  // ShadowMask is not zero. If ShadowMask[i] is zero, we assume that
+  // ShadowBytes[i] is constantly zero and doesn't need to be overwritten.
+  void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes,
+                    IRBuilder<> &IRB, Value *ShadowBase);
+  void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes,
+                    size_t Begin, size_t End, IRBuilder<> &IRB,
+                    Value *ShadowBase);
+  void copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
+                          ArrayRef<uint8_t> ShadowBytes, size_t Begin,
+                          size_t End, IRBuilder<> &IRB, Value *ShadowBase);
+
+  void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison);
+
+  Value *createAllocaForLayout(IRBuilder<> &IRB, const ASanStackFrameLayout &L,
+                               bool Dynamic);
+  PHINode *createPHI(IRBuilder<> &IRB, Value *Cond, Value *ValueIfTrue,
+                     Instruction *ThenTerm, Value *ValueIfFalse);
+};
+
+} // end anonymous namespace
+
+void LocationMetadata::parse(MDNode *MDN) {
+  assert(MDN->getNumOperands() == 3);
+  MDString *DIFilename = cast<MDString>(MDN->getOperand(0));
+  Filename = DIFilename->getString();
+  LineNo = mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
+  ColumnNo =
+      mdconst::extract<ConstantInt>(MDN->getOperand(2))->getLimitedValue();
+}
+
+// FIXME: It would be cleaner to instead attach relevant metadata to the globals
+// we want to sanitize instead and reading this metadata on each pass over a
+// function instead of reading module level metadata at first.
+GlobalsMetadata::GlobalsMetadata(Module &M) {
+  NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
+  if (!Globals)
+    return;
+  for (auto MDN : Globals->operands()) {
+    // Metadata node contains the global and the fields of "Entry".
+    assert(MDN->getNumOperands() == 5);
+    auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
+    // The optimizer may optimize away a global entirely.
+    if (!V)
+      continue;
+    auto *StrippedV = V->stripPointerCasts();
+    auto *GV = dyn_cast<GlobalVariable>(StrippedV);
+    if (!GV)
+      continue;
+    // We can already have an entry for GV if it was merged with another
+    // global.
+    Entry &E = Entries[GV];
+    if (auto *Loc = cast_or_null<MDNode>(MDN->getOperand(1)))
+      E.SourceLoc.parse(Loc);
+    if (auto *Name = cast_or_null<MDString>(MDN->getOperand(2)))
+      E.Name = Name->getString();
+    ConstantInt *IsDynInit = mdconst::extract<ConstantInt>(MDN->getOperand(3));
+    E.IsDynInit |= IsDynInit->isOne();
+    ConstantInt *IsExcluded =
+        mdconst::extract<ConstantInt>(MDN->getOperand(4));
+    E.IsExcluded |= IsExcluded->isOne();
+  }
+}
+
+AnalysisKey ASanGlobalsMetadataAnalysis::Key;
+
+GlobalsMetadata ASanGlobalsMetadataAnalysis::run(Module &M,
+                                                 ModuleAnalysisManager &AM) {
+  return GlobalsMetadata(M);
+}
+
+AddressSanitizerPass::AddressSanitizerPass(bool CompileKernel, bool Recover,
+                                           bool UseAfterScope)
+    : CompileKernel(CompileKernel), Recover(Recover),
+      UseAfterScope(UseAfterScope) {}
+
+PreservedAnalyses AddressSanitizerPass::run(Function &F,
+                                            AnalysisManager<Function> &AM) {
+  auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+  Module &M = *F.getParent();
+  if (auto *R = MAMProxy.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) {
+    const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+    AddressSanitizer Sanitizer(M, R, CompileKernel, Recover, UseAfterScope);
+    if (Sanitizer.instrumentFunction(F, TLI))
+      return PreservedAnalyses::none();
+    return PreservedAnalyses::all();
+  }
+
+  report_fatal_error(
+      "The ASanGlobalsMetadataAnalysis is required to run before "
+      "AddressSanitizer can run");
+  return PreservedAnalyses::all();
+}
+
+ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(bool CompileKernel,
+                                                       bool Recover,
+                                                       bool UseGlobalGC,
+                                                       bool UseOdrIndicator)
+    : CompileKernel(CompileKernel), Recover(Recover), UseGlobalGC(UseGlobalGC),
+      UseOdrIndicator(UseOdrIndicator) {}
+
+PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M,
+                                                  AnalysisManager<Module> &AM) {
+  GlobalsMetadata &GlobalsMD = AM.getResult<ASanGlobalsMetadataAnalysis>(M);
+  ModuleAddressSanitizer Sanitizer(M, &GlobalsMD, CompileKernel, Recover,
+                                   UseGlobalGC, UseOdrIndicator);
+  if (Sanitizer.instrumentModule(M))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS(ASanGlobalsMetadataWrapperPass, "asan-globals-md",
+                "Read metadata to mark which globals should be instrumented "
+                "when running ASan.",
+                false, true)
+
+char AddressSanitizerLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(
+    AddressSanitizerLegacyPass, "asan",
+    "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
+    false)
+INITIALIZE_PASS_DEPENDENCY(ASanGlobalsMetadataWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(
+    AddressSanitizerLegacyPass, "asan",
+    "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
+    false)
+
+FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel,
+                                                       bool Recover,
+                                                       bool UseAfterScope) {
+  assert(!CompileKernel || Recover);
+  return new AddressSanitizerLegacyPass(CompileKernel, Recover, UseAfterScope);
+}
+
+char ModuleAddressSanitizerLegacyPass::ID = 0;
+
+INITIALIZE_PASS(
+    ModuleAddressSanitizerLegacyPass, "asan-module",
+    "AddressSanitizer: detects use-after-free and out-of-bounds bugs."
+    "ModulePass",
+    false, false)
+
+ModulePass *llvm::createModuleAddressSanitizerLegacyPassPass(
+    bool CompileKernel, bool Recover, bool UseGlobalsGC, bool UseOdrIndicator) {
+  assert(!CompileKernel || Recover);
+  return new ModuleAddressSanitizerLegacyPass(CompileKernel, Recover,
+                                              UseGlobalsGC, UseOdrIndicator);
+}
+
+static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
+  size_t Res = countTrailingZeros(TypeSize / 8);
+  assert(Res < kNumberOfAccessSizes);
+  return Res;
+}
+
+/// Create a global describing a source location.
+static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M,
+                                                       LocationMetadata MD) {
+  Constant *LocData[] = {
+      createPrivateGlobalForString(M, MD.Filename, true, kAsanGenPrefix),
+      ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.LineNo),
+      ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.ColumnNo),
+  };
+  auto LocStruct = ConstantStruct::getAnon(LocData);
+  auto GV = new GlobalVariable(M, LocStruct->getType(), true,
+                               GlobalValue::PrivateLinkage, LocStruct,
+                               kAsanGenPrefix);
+  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  return GV;
+}
+
+/// Check if \p G has been created by a trusted compiler pass.
+static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
+  // Do not instrument @llvm.global_ctors, @llvm.used, etc.
+  if (G->getName().startswith("llvm."))
+    return true;
+
+  // Do not instrument asan globals.
+  if (G->getName().startswith(kAsanGenPrefix) ||
+      G->getName().startswith(kSanCovGenPrefix) ||
+      G->getName().startswith(kODRGenPrefix))
+    return true;
+
+  // Do not instrument gcov counter arrays.
+  if (G->getName() == "__llvm_gcov_ctr")
+    return true;
+
+  return false;
+}
+
+Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
+  // Shadow >> scale
+  Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
+  if (Mapping.Offset == 0) return Shadow;
+  // (Shadow >> scale) | offset
+  Value *ShadowBase;
+  if (LocalDynamicShadow)
+    ShadowBase = LocalDynamicShadow;
+  else
+    ShadowBase = ConstantInt::get(IntptrTy, Mapping.Offset);
+  if (Mapping.OrShadowOffset)
+    return IRB.CreateOr(Shadow, ShadowBase);
+  else
+    return IRB.CreateAdd(Shadow, ShadowBase);
+}
+
+// Instrument memset/memmove/memcpy
+void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+  IRBuilder<> IRB(MI);
+  if (isa<MemTransferInst>(MI)) {
+    IRB.CreateCall(
+        isa<MemMoveInst>(MI) ? AsanMemmove : AsanMemcpy,
+        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+  } else if (isa<MemSetInst>(MI)) {
+    IRB.CreateCall(
+        AsanMemset,
+        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+  }
+  MI->eraseFromParent();
+}
+
+/// Check if we want (and can) handle this alloca.
+bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
+  auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI);
+
+  if (PreviouslySeenAllocaInfo != ProcessedAllocas.end())
+    return PreviouslySeenAllocaInfo->getSecond();
+
+  bool IsInteresting =
+      (AI.getAllocatedType()->isSized() &&
+       // alloca() may be called with 0 size, ignore it.
+       ((!AI.isStaticAlloca()) || getAllocaSizeInBytes(AI) > 0) &&
+       // We are only interested in allocas not promotable to registers.
+       // Promotable allocas are common under -O0.
+       (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) &&
+       // inalloca allocas are not treated as static, and we don't want
+       // dynamic alloca instrumentation for them as well.
+       !AI.isUsedWithInAlloca() &&
+       // swifterror allocas are register promoted by ISel
+       !AI.isSwiftError());
+
+  ProcessedAllocas[&AI] = IsInteresting;
+  return IsInteresting;
+}
+
+bool AddressSanitizer::ignoreAccess(Value *Ptr) {
+  // Do not instrument acesses from different address spaces; we cannot deal
+  // with them.
+  Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
+  if (PtrTy->getPointerAddressSpace() != 0)
+    return true;
+
+  // Ignore swifterror addresses.
+  // swifterror memory addresses are mem2reg promoted by instruction
+  // selection. As such they cannot have regular uses like an instrumentation
+  // function and it makes no sense to track them as memory.
+  if (Ptr->isSwiftError())
+    return true;
+
+  // Treat memory accesses to promotable allocas as non-interesting since they
+  // will not cause memory violations. This greatly speeds up the instrumented
+  // executable at -O0.
+  if (auto AI = dyn_cast_or_null<AllocaInst>(Ptr))
+    if (ClSkipPromotableAllocas && !isInterestingAlloca(*AI))
+      return true;
+
+  return false;
+}
+
+void AddressSanitizer::getInterestingMemoryOperands(
+    Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
+  // Skip memory accesses inserted by another instrumentation.
+  if (I->hasMetadata("nosanitize"))
+    return;
+
+  // Do not instrument the load fetching the dynamic shadow address.
+  if (LocalDynamicShadow == I)
+    return;
+
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand()))
+      return;
+    Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
+                             LI->getType(), LI->getAlign());
+  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand()))
+      return;
+    Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
+                             SI->getValueOperand()->getType(), SI->getAlign());
+  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+    if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand()))
+      return;
+    Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
+                             RMW->getValOperand()->getType(), None);
+  } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
+    if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand()))
+      return;
+    Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
+                             XCHG->getCompareOperand()->getType(), None);
+  } else if (auto CI = dyn_cast<CallInst>(I)) {
+    auto *F = CI->getCalledFunction();
+    if (F && (F->getName().startswith("llvm.masked.load.") ||
+              F->getName().startswith("llvm.masked.store."))) {
+      bool IsWrite = F->getName().startswith("llvm.masked.store.");
+      // Masked store has an initial operand for the value.
+      unsigned OpOffset = IsWrite ? 1 : 0;
+      if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads)
+        return;
+
+      auto BasePtr = CI->getOperand(OpOffset);
+      if (ignoreAccess(BasePtr))
+        return;
+      auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
+      MaybeAlign Alignment = Align(1);
+      // Otherwise no alignment guarantees. We probably got Undef.
+      if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
+        Alignment = Op->getMaybeAlignValue();
+      Value *Mask = CI->getOperand(2 + OpOffset);
+      Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
+    } else {
+      for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) {
+        if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
+            ignoreAccess(CI->getArgOperand(ArgNo)))
+          continue;
+        Type *Ty = CI->getParamByValType(ArgNo);
+        Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
+      }
+    }
+  }
+}
+
+static bool isPointerOperand(Value *V) {
+  return V->getType()->isPointerTy() || isa<PtrToIntInst>(V);
+}
+
+// This is a rough heuristic; it may cause both false positives and
+// false negatives. The proper implementation requires cooperation with
+// the frontend.
+static bool isInterestingPointerComparison(Instruction *I) {
+  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) {
+    if (!Cmp->isRelational())
+      return false;
+  } else {
+    return false;
+  }
+  return isPointerOperand(I->getOperand(0)) &&
+         isPointerOperand(I->getOperand(1));
+}
+
+// This is a rough heuristic; it may cause both false positives and
+// false negatives. The proper implementation requires cooperation with
+// the frontend.
+static bool isInterestingPointerSubtraction(Instruction *I) {
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+    if (BO->getOpcode() != Instruction::Sub)
+      return false;
+  } else {
+    return false;
+  }
+  return isPointerOperand(I->getOperand(0)) &&
+         isPointerOperand(I->getOperand(1));
+}
+
+bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
+  // If a global variable does not have dynamic initialization we don't
+  // have to instrument it.  However, if a global does not have initializer
+  // at all, we assume it has dynamic initializer (in other TU).
+  //
+  // FIXME: Metadata should be attched directly to the global directly instead
+  // of being added to llvm.asan.globals.
+  return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit;
+}
+
+void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
+    Instruction *I) {
+  IRBuilder<> IRB(I);
+  FunctionCallee F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
+  Value *Param[2] = {I->getOperand(0), I->getOperand(1)};
+  for (Value *&i : Param) {
+    if (i->getType()->isPointerTy())
+      i = IRB.CreatePointerCast(i, IntptrTy);
+  }
+  IRB.CreateCall(F, Param);
+}
+
+static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I,
+                                Instruction *InsertBefore, Value *Addr,
+                                MaybeAlign Alignment, unsigned Granularity,
+                                uint32_t TypeSize, bool IsWrite,
+                                Value *SizeArgument, bool UseCalls,
+                                uint32_t Exp) {
+  // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
+  // if the data is properly aligned.
+  if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
+       TypeSize == 128) &&
+      (!Alignment || *Alignment >= Granularity || *Alignment >= TypeSize / 8))
+    return Pass->instrumentAddress(I, InsertBefore, Addr, TypeSize, IsWrite,
+                                   nullptr, UseCalls, Exp);
+  Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeSize,
+                                         IsWrite, nullptr, UseCalls, Exp);
+}
+
+static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
+                                        const DataLayout &DL, Type *IntptrTy,
+                                        Value *Mask, Instruction *I,
+                                        Value *Addr, MaybeAlign Alignment,
+                                        unsigned Granularity, uint32_t TypeSize,
+                                        bool IsWrite, Value *SizeArgument,
+                                        bool UseCalls, uint32_t Exp) {
+  auto *VTy = cast<FixedVectorType>(
+      cast<PointerType>(Addr->getType())->getElementType());
+  uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
+  unsigned Num = VTy->getNumElements();
+  auto Zero = ConstantInt::get(IntptrTy, 0);
+  for (unsigned Idx = 0; Idx < Num; ++Idx) {
+    Value *InstrumentedAddress = nullptr;
+    Instruction *InsertBefore = I;
+    if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
+      // dyn_cast as we might get UndefValue
+      if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
+        if (Masked->isZero())
+          // Mask is constant false, so no instrumentation needed.
+          continue;
+        // If we have a true or undef value, fall through to doInstrumentAddress
+        // with InsertBefore == I
+      }
+    } else {
+      IRBuilder<> IRB(I);
+      Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
+      Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
+      InsertBefore = ThenTerm;
+    }
+
+    IRBuilder<> IRB(InsertBefore);
+    InstrumentedAddress =
+        IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
+    doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,
+                        Granularity, ElemTypeSize, IsWrite, SizeArgument,
+                        UseCalls, Exp);
+  }
+}
+
+void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
+                                     InterestingMemoryOperand &O, bool UseCalls,
+                                     const DataLayout &DL) {
+  Value *Addr = O.getPtr();
+
+  // Optimization experiments.
+  // The experiments can be used to evaluate potential optimizations that remove
+  // instrumentation (assess false negatives). Instead of completely removing
+  // some instrumentation, you set Exp to a non-zero value (mask of optimization
+  // experiments that want to remove instrumentation of this instruction).
+  // If Exp is non-zero, this pass will emit special calls into runtime
+  // (e.g. __asan_report_exp_load1 instead of __asan_report_load1). These calls
+  // make runtime terminate the program in a special way (with a different
+  // exit status). Then you run the new compiler on a buggy corpus, collect
+  // the special terminations (ideally, you don't see them at all -- no false
+  // negatives) and make the decision on the optimization.
+  uint32_t Exp = ClForceExperiment;
+
+  if (ClOpt && ClOptGlobals) {
+    // If initialization order checking is disabled, a simple access to a
+    // dynamically initialized global is always valid.
     GlobalVariable *G = dyn_cast<GlobalVariable>(getUnderlyingObject(Addr));
-    if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) && 
-        isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) { 
-      NumOptimizedAccessesToGlobalVar++; 
-      return; 
-    } 
-  } 
- 
-  if (ClOpt && ClOptStack) { 
-    // A direct inbounds access to a stack variable is always valid. 
+    if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
+        isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) {
+      NumOptimizedAccessesToGlobalVar++;
+      return;
+    }
+  }
+
+  if (ClOpt && ClOptStack) {
+    // A direct inbounds access to a stack variable is always valid.
     if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
-        isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) { 
-      NumOptimizedAccessesToStackVar++; 
-      return; 
-    } 
-  } 
- 
-  if (O.IsWrite) 
-    NumInstrumentedWrites++; 
-  else 
-    NumInstrumentedReads++; 
- 
-  unsigned Granularity = 1 << Mapping.Scale; 
-  if (O.MaybeMask) { 
-    instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.getInsn(), 
-                                Addr, O.Alignment, Granularity, O.TypeSize, 
-                                O.IsWrite, nullptr, UseCalls, Exp); 
-  } else { 
-    doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment, 
-                        Granularity, O.TypeSize, O.IsWrite, nullptr, UseCalls, 
-                        Exp); 
-  } 
-} 
- 
-Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore, 
-                                                 Value *Addr, bool IsWrite, 
-                                                 size_t AccessSizeIndex, 
-                                                 Value *SizeArgument, 
-                                                 uint32_t Exp) { 
-  IRBuilder<> IRB(InsertBefore); 
-  Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp); 
-  CallInst *Call = nullptr; 
-  if (SizeArgument) { 
-    if (Exp == 0) 
-      Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][0], 
-                            {Addr, SizeArgument}); 
-    else 
-      Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][1], 
-                            {Addr, SizeArgument, ExpVal}); 
-  } else { 
-    if (Exp == 0) 
-      Call = 
-          IRB.CreateCall(AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr); 
-    else 
-      Call = IRB.CreateCall(AsanErrorCallback[IsWrite][1][AccessSizeIndex], 
-                            {Addr, ExpVal}); 
-  } 
- 
-  Call->setCannotMerge(); 
-  return Call; 
-} 
- 
-Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, 
-                                           Value *ShadowValue, 
-                                           uint32_t TypeSize) { 
-  size_t Granularity = static_cast<size_t>(1) << Mapping.Scale; 
-  // Addr & (Granularity - 1) 
-  Value *LastAccessedByte = 
-      IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); 
-  // (Addr & (Granularity - 1)) + size - 1 
-  if (TypeSize / 8 > 1) 
-    LastAccessedByte = IRB.CreateAdd( 
-        LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); 
-  // (uint8_t) ((Addr & (Granularity-1)) + size - 1) 
-  LastAccessedByte = 
-      IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false); 
-  // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue 
-  return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue); 
-} 
- 
-void AddressSanitizer::instrumentAddress(Instruction *OrigIns, 
-                                         Instruction *InsertBefore, Value *Addr, 
-                                         uint32_t TypeSize, bool IsWrite, 
-                                         Value *SizeArgument, bool UseCalls, 
-                                         uint32_t Exp) { 
-  bool IsMyriad = TargetTriple.getVendor() == llvm::Triple::Myriad; 
- 
-  IRBuilder<> IRB(InsertBefore); 
-  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); 
-  size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); 
- 
-  if (UseCalls) { 
-    if (Exp == 0) 
-      IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex], 
-                     AddrLong); 
-    else 
-      IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex], 
-                     {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)}); 
-    return; 
-  } 
- 
-  if (IsMyriad) { 
-    // Strip the cache bit and do range check. 
-    // AddrLong &= ~kMyriadCacheBitMask32 
-    AddrLong = IRB.CreateAnd(AddrLong, ~kMyriadCacheBitMask32); 
-    // Tag = AddrLong >> kMyriadTagShift 
-    Value *Tag = IRB.CreateLShr(AddrLong, kMyriadTagShift); 
-    // Tag == kMyriadDDRTag 
-    Value *TagCheck = 
-        IRB.CreateICmpEQ(Tag, ConstantInt::get(IntptrTy, kMyriadDDRTag)); 
- 
-    Instruction *TagCheckTerm = 
-        SplitBlockAndInsertIfThen(TagCheck, InsertBefore, false, 
-                                  MDBuilder(*C).createBranchWeights(1, 100000)); 
-    assert(cast<BranchInst>(TagCheckTerm)->isUnconditional()); 
-    IRB.SetInsertPoint(TagCheckTerm); 
-    InsertBefore = TagCheckTerm; 
-  } 
- 
-  Type *ShadowTy = 
-      IntegerType::get(*C, std::max(8U, TypeSize >> Mapping.Scale)); 
-  Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); 
-  Value *ShadowPtr = memToShadow(AddrLong, IRB); 
-  Value *CmpVal = Constant::getNullValue(ShadowTy); 
-  Value *ShadowValue = 
-      IRB.CreateLoad(ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); 
- 
-  Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); 
-  size_t Granularity = 1ULL << Mapping.Scale; 
-  Instruction *CrashTerm = nullptr; 
- 
-  if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { 
-    // We use branch weights for the slow path check, to indicate that the slow 
-    // path is rarely taken. This seems to be the case for SPEC benchmarks. 
-    Instruction *CheckTerm = SplitBlockAndInsertIfThen( 
-        Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000)); 
-    assert(cast<BranchInst>(CheckTerm)->isUnconditional()); 
-    BasicBlock *NextBB = CheckTerm->getSuccessor(0); 
-    IRB.SetInsertPoint(CheckTerm); 
-    Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); 
-    if (Recover) { 
-      CrashTerm = SplitBlockAndInsertIfThen(Cmp2, CheckTerm, false); 
-    } else { 
-      BasicBlock *CrashBlock = 
-        BasicBlock::Create(*C, "", NextBB->getParent(), NextBB); 
-      CrashTerm = new UnreachableInst(*C, CrashBlock); 
-      BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); 
-      ReplaceInstWithInst(CheckTerm, NewTerm); 
-    } 
-  } else { 
-    CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, !Recover); 
-  } 
- 
-  Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite, 
-                                         AccessSizeIndex, SizeArgument, Exp); 
-  Crash->setDebugLoc(OrigIns->getDebugLoc()); 
-} 
- 
-// Instrument unusual size or unusual alignment. 
-// We can not do it with a single check, so we do 1-byte check for the first 
-// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able 
-// to report the actual access size. 
-void AddressSanitizer::instrumentUnusualSizeOrAlignment( 
-    Instruction *I, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, 
-    bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) { 
-  IRBuilder<> IRB(InsertBefore); 
-  Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8); 
-  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); 
-  if (UseCalls) { 
-    if (Exp == 0) 
-      IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0], 
-                     {AddrLong, Size}); 
-    else 
-      IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][1], 
-                     {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)}); 
-  } else { 
-    Value *LastByte = IRB.CreateIntToPtr( 
-        IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), 
-        Addr->getType()); 
-    instrumentAddress(I, InsertBefore, Addr, 8, IsWrite, Size, false, Exp); 
-    instrumentAddress(I, InsertBefore, LastByte, 8, IsWrite, Size, false, Exp); 
-  } 
-} 
- 
-void ModuleAddressSanitizer::poisonOneInitializer(Function &GlobalInit, 
-                                                  GlobalValue *ModuleName) { 
-  // Set up the arguments to our poison/unpoison functions. 
-  IRBuilder<> IRB(&GlobalInit.front(), 
-                  GlobalInit.front().getFirstInsertionPt()); 
- 
-  // Add a call to poison all external globals before the given function starts. 
-  Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy); 
-  IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr); 
- 
-  // Add calls to unpoison all globals before each return instruction. 
-  for (auto &BB : GlobalInit.getBasicBlockList()) 
-    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) 
-      CallInst::Create(AsanUnpoisonGlobals, "", RI); 
-} 
- 
-void ModuleAddressSanitizer::createInitializerPoisonCalls( 
-    Module &M, GlobalValue *ModuleName) { 
-  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); 
-  if (!GV) 
-    return; 
- 
-  ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); 
-  if (!CA) 
-    return; 
- 
-  for (Use &OP : CA->operands()) { 
-    if (isa<ConstantAggregateZero>(OP)) continue; 
-    ConstantStruct *CS = cast<ConstantStruct>(OP); 
- 
-    // Must have a function or null ptr. 
-    if (Function *F = dyn_cast<Function>(CS->getOperand(1))) { 
-      if (F->getName() == kAsanModuleCtorName) continue; 
-      auto *Priority = cast<ConstantInt>(CS->getOperand(0)); 
-      // Don't instrument CTORs that will run before asan.module_ctor. 
-      if (Priority->getLimitedValue() <= GetCtorAndDtorPriority(TargetTriple)) 
-        continue; 
-      poisonOneInitializer(*F, ModuleName); 
-    } 
-  } 
-} 
- 
-const GlobalVariable * 
-ModuleAddressSanitizer::getExcludedAliasedGlobal(const GlobalAlias &GA) const { 
-  // In case this function should be expanded to include rules that do not just 
-  // apply when CompileKernel is true, either guard all existing rules with an 
-  // 'if (CompileKernel) { ... }' or be absolutely sure that all these rules 
-  // should also apply to user space. 
-  assert(CompileKernel && "Only expecting to be called when compiling kernel"); 
- 
-  const Constant *C = GA.getAliasee(); 
- 
-  // When compiling the kernel, globals that are aliased by symbols prefixed 
-  // by "__" are special and cannot be padded with a redzone. 
-  if (GA.getName().startswith("__")) 
-    return dyn_cast<GlobalVariable>(C->stripPointerCastsAndAliases()); 
- 
-  return nullptr; 
-} 
- 
-bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const { 
-  Type *Ty = G->getValueType(); 
-  LLVM_DEBUG(dbgs() << "GLOBAL: " << *G << "\n"); 
- 
-  // FIXME: Metadata should be attched directly to the global directly instead 
-  // of being added to llvm.asan.globals. 
-  if (GlobalsMD.get(G).IsExcluded) return false; 
-  if (!Ty->isSized()) return false; 
-  if (!G->hasInitializer()) return false; 
-  // Only instrument globals of default address spaces 
-  if (G->getAddressSpace()) return false; 
-  if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals. 
-  // Two problems with thread-locals: 
-  //   - The address of the main thread's copy can't be computed at link-time. 
-  //   - Need to poison all copies, not just the main thread's one. 
-  if (G->isThreadLocal()) return false; 
-  // For now, just ignore this Global if the alignment is large. 
-  if (G->getAlignment() > getMinRedzoneSizeForGlobal()) return false; 
- 
-  // For non-COFF targets, only instrument globals known to be defined by this 
-  // TU. 
-  // FIXME: We can instrument comdat globals on ELF if we are using the 
-  // GC-friendly metadata scheme. 
-  if (!TargetTriple.isOSBinFormatCOFF()) { 
-    if (!G->hasExactDefinition() || G->hasComdat()) 
-      return false; 
-  } else { 
-    // On COFF, don't instrument non-ODR linkages. 
-    if (G->isInterposable()) 
-      return false; 
-  } 
- 
-  // If a comdat is present, it must have a selection kind that implies ODR 
-  // semantics: no duplicates, any, or exact match. 
-  if (Comdat *C = G->getComdat()) { 
-    switch (C->getSelectionKind()) { 
-    case Comdat::Any: 
-    case Comdat::ExactMatch: 
-    case Comdat::NoDuplicates: 
-      break; 
-    case Comdat::Largest: 
-    case Comdat::SameSize: 
-      return false; 
-    } 
-  } 
- 
-  if (G->hasSection()) { 
-    // The kernel uses explicit sections for mostly special global variables 
-    // that we should not instrument. E.g. the kernel may rely on their layout 
-    // without redzones, or remove them at link time ("discard.*"), etc. 
-    if (CompileKernel) 
-      return false; 
- 
-    StringRef Section = G->getSection(); 
- 
-    // Globals from llvm.metadata aren't emitted, do not instrument them. 
-    if (Section == "llvm.metadata") return false; 
-    // Do not instrument globals from special LLVM sections. 
-    if (Section.find("__llvm") != StringRef::npos || Section.find("__LLVM") != StringRef::npos) return false; 
- 
-    // Do not instrument function pointers to initialization and termination 
-    // routines: dynamic linker will not properly handle redzones. 
-    if (Section.startswith(".preinit_array") || 
-        Section.startswith(".init_array") || 
-        Section.startswith(".fini_array")) { 
-      return false; 
-    } 
- 
+        isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) {
+      NumOptimizedAccessesToStackVar++;
+      return;
+    }
+  }
+
+  if (O.IsWrite)
+    NumInstrumentedWrites++;
+  else
+    NumInstrumentedReads++;
+
+  unsigned Granularity = 1 << Mapping.Scale;
+  if (O.MaybeMask) {
+    instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.getInsn(),
+                                Addr, O.Alignment, Granularity, O.TypeSize,
+                                O.IsWrite, nullptr, UseCalls, Exp);
+  } else {
+    doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment,
+                        Granularity, O.TypeSize, O.IsWrite, nullptr, UseCalls,
+                        Exp);
+  }
+}
+
+Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,
+                                                 Value *Addr, bool IsWrite,
+                                                 size_t AccessSizeIndex,
+                                                 Value *SizeArgument,
+                                                 uint32_t Exp) {
+  IRBuilder<> IRB(InsertBefore);
+  Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp);
+  CallInst *Call = nullptr;
+  if (SizeArgument) {
+    if (Exp == 0)
+      Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][0],
+                            {Addr, SizeArgument});
+    else
+      Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][1],
+                            {Addr, SizeArgument, ExpVal});
+  } else {
+    if (Exp == 0)
+      Call =
+          IRB.CreateCall(AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr);
+    else
+      Call = IRB.CreateCall(AsanErrorCallback[IsWrite][1][AccessSizeIndex],
+                            {Addr, ExpVal});
+  }
+
+  Call->setCannotMerge();
+  return Call;
+}
+
+Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
+                                           Value *ShadowValue,
+                                           uint32_t TypeSize) {
+  size_t Granularity = static_cast<size_t>(1) << Mapping.Scale;
+  // Addr & (Granularity - 1)
+  Value *LastAccessedByte =
+      IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
+  // (Addr & (Granularity - 1)) + size - 1
+  if (TypeSize / 8 > 1)
+    LastAccessedByte = IRB.CreateAdd(
+        LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
+  // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
+  LastAccessedByte =
+      IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
+  // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
+  return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
+}
+
+void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
+                                         Instruction *InsertBefore, Value *Addr,
+                                         uint32_t TypeSize, bool IsWrite,
+                                         Value *SizeArgument, bool UseCalls,
+                                         uint32_t Exp) {
+  bool IsMyriad = TargetTriple.getVendor() == llvm::Triple::Myriad;
+
+  IRBuilder<> IRB(InsertBefore);
+  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+  size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
+
+  if (UseCalls) {
+    if (Exp == 0)
+      IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex],
+                     AddrLong);
+    else
+      IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex],
+                     {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)});
+    return;
+  }
+
+  if (IsMyriad) {
+    // Strip the cache bit and do range check.
+    // AddrLong &= ~kMyriadCacheBitMask32
+    AddrLong = IRB.CreateAnd(AddrLong, ~kMyriadCacheBitMask32);
+    // Tag = AddrLong >> kMyriadTagShift
+    Value *Tag = IRB.CreateLShr(AddrLong, kMyriadTagShift);
+    // Tag == kMyriadDDRTag
+    Value *TagCheck =
+        IRB.CreateICmpEQ(Tag, ConstantInt::get(IntptrTy, kMyriadDDRTag));
+
+    Instruction *TagCheckTerm =
+        SplitBlockAndInsertIfThen(TagCheck, InsertBefore, false,
+                                  MDBuilder(*C).createBranchWeights(1, 100000));
+    assert(cast<BranchInst>(TagCheckTerm)->isUnconditional());
+    IRB.SetInsertPoint(TagCheckTerm);
+    InsertBefore = TagCheckTerm;
+  }
+
+  Type *ShadowTy =
+      IntegerType::get(*C, std::max(8U, TypeSize >> Mapping.Scale));
+  Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
+  Value *ShadowPtr = memToShadow(AddrLong, IRB);
+  Value *CmpVal = Constant::getNullValue(ShadowTy);
+  Value *ShadowValue =
+      IRB.CreateLoad(ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+
+  Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
+  size_t Granularity = 1ULL << Mapping.Scale;
+  Instruction *CrashTerm = nullptr;
+
+  if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
+    // We use branch weights for the slow path check, to indicate that the slow
+    // path is rarely taken. This seems to be the case for SPEC benchmarks.
+    Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+        Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000));
+    assert(cast<BranchInst>(CheckTerm)->isUnconditional());
+    BasicBlock *NextBB = CheckTerm->getSuccessor(0);
+    IRB.SetInsertPoint(CheckTerm);
+    Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
+    if (Recover) {
+      CrashTerm = SplitBlockAndInsertIfThen(Cmp2, CheckTerm, false);
+    } else {
+      BasicBlock *CrashBlock =
+        BasicBlock::Create(*C, "", NextBB->getParent(), NextBB);
+      CrashTerm = new UnreachableInst(*C, CrashBlock);
+      BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
+      ReplaceInstWithInst(CheckTerm, NewTerm);
+    }
+  } else {
+    CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, !Recover);
+  }
+
+  Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite,
+                                         AccessSizeIndex, SizeArgument, Exp);
+  Crash->setDebugLoc(OrigIns->getDebugLoc());
+}
+
+// Instrument unusual size or unusual alignment.
+// We can not do it with a single check, so we do 1-byte check for the first
+// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
+// to report the actual access size.
+void AddressSanitizer::instrumentUnusualSizeOrAlignment(
+    Instruction *I, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize,
+    bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) {
+  IRBuilder<> IRB(InsertBefore);
+  Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
+  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+  if (UseCalls) {
+    if (Exp == 0)
+      IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0],
+                     {AddrLong, Size});
+    else
+      IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][1],
+                     {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)});
+  } else {
+    Value *LastByte = IRB.CreateIntToPtr(
+        IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
+        Addr->getType());
+    instrumentAddress(I, InsertBefore, Addr, 8, IsWrite, Size, false, Exp);
+    instrumentAddress(I, InsertBefore, LastByte, 8, IsWrite, Size, false, Exp);
+  }
+}
+
+void ModuleAddressSanitizer::poisonOneInitializer(Function &GlobalInit,
+                                                  GlobalValue *ModuleName) {
+  // Set up the arguments to our poison/unpoison functions.
+  IRBuilder<> IRB(&GlobalInit.front(),
+                  GlobalInit.front().getFirstInsertionPt());
+
+  // Add a call to poison all external globals before the given function starts.
+  Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy);
+  IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr);
+
+  // Add calls to unpoison all globals before each return instruction.
+  for (auto &BB : GlobalInit.getBasicBlockList())
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
+      CallInst::Create(AsanUnpoisonGlobals, "", RI);
+}
+
+void ModuleAddressSanitizer::createInitializerPoisonCalls(
+    Module &M, GlobalValue *ModuleName) {
+  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+  if (!GV)
+    return;
+
+  ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA)
+    return;
+
+  for (Use &OP : CA->operands()) {
+    if (isa<ConstantAggregateZero>(OP)) continue;
+    ConstantStruct *CS = cast<ConstantStruct>(OP);
+
+    // Must have a function or null ptr.
+    if (Function *F = dyn_cast<Function>(CS->getOperand(1))) {
+      if (F->getName() == kAsanModuleCtorName) continue;
+      auto *Priority = cast<ConstantInt>(CS->getOperand(0));
+      // Don't instrument CTORs that will run before asan.module_ctor.
+      if (Priority->getLimitedValue() <= GetCtorAndDtorPriority(TargetTriple))
+        continue;
+      poisonOneInitializer(*F, ModuleName);
+    }
+  }
+}
+
+const GlobalVariable *
+ModuleAddressSanitizer::getExcludedAliasedGlobal(const GlobalAlias &GA) const {
+  // In case this function should be expanded to include rules that do not just
+  // apply when CompileKernel is true, either guard all existing rules with an
+  // 'if (CompileKernel) { ... }' or be absolutely sure that all these rules
+  // should also apply to user space.
+  assert(CompileKernel && "Only expecting to be called when compiling kernel");
+
+  const Constant *C = GA.getAliasee();
+
+  // When compiling the kernel, globals that are aliased by symbols prefixed
+  // by "__" are special and cannot be padded with a redzone.
+  if (GA.getName().startswith("__"))
+    return dyn_cast<GlobalVariable>(C->stripPointerCastsAndAliases());
+
+  return nullptr;
+}
+
+bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
+  Type *Ty = G->getValueType();
+  LLVM_DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
+
+  // FIXME: Metadata should be attched directly to the global directly instead
+  // of being added to llvm.asan.globals.
+  if (GlobalsMD.get(G).IsExcluded) return false;
+  if (!Ty->isSized()) return false;
+  if (!G->hasInitializer()) return false;
+  // Only instrument globals of default address spaces
+  if (G->getAddressSpace()) return false;
+  if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals.
+  // Two problems with thread-locals:
+  //   - The address of the main thread's copy can't be computed at link-time.
+  //   - Need to poison all copies, not just the main thread's one.
+  if (G->isThreadLocal()) return false;
+  // For now, just ignore this Global if the alignment is large.
+  if (G->getAlignment() > getMinRedzoneSizeForGlobal()) return false;
+
+  // For non-COFF targets, only instrument globals known to be defined by this
+  // TU.
+  // FIXME: We can instrument comdat globals on ELF if we are using the
+  // GC-friendly metadata scheme.
+  if (!TargetTriple.isOSBinFormatCOFF()) {
+    if (!G->hasExactDefinition() || G->hasComdat())
+      return false;
+  } else {
+    // On COFF, don't instrument non-ODR linkages.
+    if (G->isInterposable())
+      return false;
+  }
+
+  // If a comdat is present, it must have a selection kind that implies ODR
+  // semantics: no duplicates, any, or exact match.
+  if (Comdat *C = G->getComdat()) {
+    switch (C->getSelectionKind()) {
+    case Comdat::Any:
+    case Comdat::ExactMatch:
+    case Comdat::NoDuplicates:
+      break;
+    case Comdat::Largest:
+    case Comdat::SameSize:
+      return false;
+    }
+  }
+
+  if (G->hasSection()) {
+    // The kernel uses explicit sections for mostly special global variables
+    // that we should not instrument. E.g. the kernel may rely on their layout
+    // without redzones, or remove them at link time ("discard.*"), etc.
+    if (CompileKernel)
+      return false;
+
+    StringRef Section = G->getSection();
+
+    // Globals from llvm.metadata aren't emitted, do not instrument them.
+    if (Section == "llvm.metadata") return false;
+    // Do not instrument globals from special LLVM sections.
+    if (Section.find("__llvm") != StringRef::npos || Section.find("__LLVM") != StringRef::npos) return false;
+
+    // Do not instrument function pointers to initialization and termination
+    // routines: dynamic linker will not properly handle redzones.
+    if (Section.startswith(".preinit_array") ||
+        Section.startswith(".init_array") ||
+        Section.startswith(".fini_array")) {
+      return false;
+    }
+
     // Do not instrument user-defined sections (with names resembling
     // valid C identifiers)
     if (TargetTriple.isOSBinFormatELF()) {
@@ -1881,258 +1881,258 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
         return false;
     }
 
-    // On COFF, if the section name contains '$', it is highly likely that the 
-    // user is using section sorting to create an array of globals similar to 
-    // the way initialization callbacks are registered in .init_array and 
-    // .CRT$XCU. The ATL also registers things in .ATL$__[azm]. Adding redzones 
-    // to such globals is counterproductive, because the intent is that they 
-    // will form an array, and out-of-bounds accesses are expected. 
-    // See https://github.com/google/sanitizers/issues/305 
-    // and http://msdn.microsoft.com/en-US/en-en/library/bb918180(v=vs.120).aspx 
-    if (TargetTriple.isOSBinFormatCOFF() && Section.contains('$')) { 
-      LLVM_DEBUG(dbgs() << "Ignoring global in sorted section (contains '$'): " 
-                        << *G << "\n"); 
-      return false; 
-    } 
- 
-    if (TargetTriple.isOSBinFormatMachO()) { 
-      StringRef ParsedSegment, ParsedSection; 
-      unsigned TAA = 0, StubSize = 0; 
-      bool TAAParsed; 
-      std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier( 
-          Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize); 
-      assert(ErrorCode.empty() && "Invalid section specifier."); 
- 
-      // Ignore the globals from the __OBJC section. The ObjC runtime assumes 
-      // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to 
-      // them. 
-      if (ParsedSegment == "__OBJC" || 
-          (ParsedSegment == "__DATA" && ParsedSection.startswith("__objc_"))) { 
-        LLVM_DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G << "\n"); 
-        return false; 
-      } 
-      // See https://github.com/google/sanitizers/issues/32 
-      // Constant CFString instances are compiled in the following way: 
-      //  -- the string buffer is emitted into 
-      //     __TEXT,__cstring,cstring_literals 
-      //  -- the constant NSConstantString structure referencing that buffer 
-      //     is placed into __DATA,__cfstring 
-      // Therefore there's no point in placing redzones into __DATA,__cfstring. 
-      // Moreover, it causes the linker to crash on OS X 10.7 
-      if (ParsedSegment == "__DATA" && ParsedSection == "__cfstring") { 
-        LLVM_DEBUG(dbgs() << "Ignoring CFString: " << *G << "\n"); 
-        return false; 
-      } 
-      // The linker merges the contents of cstring_literals and removes the 
-      // trailing zeroes. 
-      if (ParsedSegment == "__TEXT" && (TAA & MachO::S_CSTRING_LITERALS)) { 
-        LLVM_DEBUG(dbgs() << "Ignoring a cstring literal: " << *G << "\n"); 
-        return false; 
-      } 
-    } 
-  } 
- 
-  if (CompileKernel) { 
-    // Globals that prefixed by "__" are special and cannot be padded with a 
-    // redzone. 
-    if (G->getName().startswith("__")) 
-      return false; 
-  } 
- 
-  return true; 
-} 
- 
-// On Mach-O platforms, we emit global metadata in a separate section of the 
-// binary in order to allow the linker to properly dead strip. This is only 
-// supported on recent versions of ld64. 
-bool ModuleAddressSanitizer::ShouldUseMachOGlobalsSection() const { 
-  if (!TargetTriple.isOSBinFormatMachO()) 
-    return false; 
- 
-  if (TargetTriple.isMacOSX() && !TargetTriple.isMacOSXVersionLT(10, 11)) 
-    return true; 
-  if (TargetTriple.isiOS() /* or tvOS */ && !TargetTriple.isOSVersionLT(9)) 
-    return true; 
-  if (TargetTriple.isWatchOS() && !TargetTriple.isOSVersionLT(2)) 
-    return true; 
- 
-  return false; 
-} 
- 
-StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const { 
-  switch (TargetTriple.getObjectFormat()) { 
-  case Triple::COFF:  return ".ASAN$GL"; 
-  case Triple::ELF:   return "asan_globals"; 
-  case Triple::MachO: return "__DATA,__asan_globals,regular"; 
-  case Triple::Wasm: 
+    // On COFF, if the section name contains '$', it is highly likely that the
+    // user is using section sorting to create an array of globals similar to
+    // the way initialization callbacks are registered in .init_array and
+    // .CRT$XCU. The ATL also registers things in .ATL$__[azm]. Adding redzones
+    // to such globals is counterproductive, because the intent is that they
+    // will form an array, and out-of-bounds accesses are expected.
+    // See https://github.com/google/sanitizers/issues/305
+    // and http://msdn.microsoft.com/en-US/en-en/library/bb918180(v=vs.120).aspx
+    if (TargetTriple.isOSBinFormatCOFF() && Section.contains('$')) {
+      LLVM_DEBUG(dbgs() << "Ignoring global in sorted section (contains '$'): "
+                        << *G << "\n");
+      return false;
+    }
+
+    if (TargetTriple.isOSBinFormatMachO()) {
+      StringRef ParsedSegment, ParsedSection;
+      unsigned TAA = 0, StubSize = 0;
+      bool TAAParsed;
+      std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(
+          Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize);
+      assert(ErrorCode.empty() && "Invalid section specifier.");
+
+      // Ignore the globals from the __OBJC section. The ObjC runtime assumes
+      // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
+      // them.
+      if (ParsedSegment == "__OBJC" ||
+          (ParsedSegment == "__DATA" && ParsedSection.startswith("__objc_"))) {
+        LLVM_DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G << "\n");
+        return false;
+      }
+      // See https://github.com/google/sanitizers/issues/32
+      // Constant CFString instances are compiled in the following way:
+      //  -- the string buffer is emitted into
+      //     __TEXT,__cstring,cstring_literals
+      //  -- the constant NSConstantString structure referencing that buffer
+      //     is placed into __DATA,__cfstring
+      // Therefore there's no point in placing redzones into __DATA,__cfstring.
+      // Moreover, it causes the linker to crash on OS X 10.7
+      if (ParsedSegment == "__DATA" && ParsedSection == "__cfstring") {
+        LLVM_DEBUG(dbgs() << "Ignoring CFString: " << *G << "\n");
+        return false;
+      }
+      // The linker merges the contents of cstring_literals and removes the
+      // trailing zeroes.
+      if (ParsedSegment == "__TEXT" && (TAA & MachO::S_CSTRING_LITERALS)) {
+        LLVM_DEBUG(dbgs() << "Ignoring a cstring literal: " << *G << "\n");
+        return false;
+      }
+    }
+  }
+
+  if (CompileKernel) {
+    // Globals that prefixed by "__" are special and cannot be padded with a
+    // redzone.
+    if (G->getName().startswith("__"))
+      return false;
+  }
+
+  return true;
+}
+
+// On Mach-O platforms, we emit global metadata in a separate section of the
+// binary in order to allow the linker to properly dead strip. This is only
+// supported on recent versions of ld64.
+bool ModuleAddressSanitizer::ShouldUseMachOGlobalsSection() const {
+  if (!TargetTriple.isOSBinFormatMachO())
+    return false;
+
+  if (TargetTriple.isMacOSX() && !TargetTriple.isMacOSXVersionLT(10, 11))
+    return true;
+  if (TargetTriple.isiOS() /* or tvOS */ && !TargetTriple.isOSVersionLT(9))
+    return true;
+  if (TargetTriple.isWatchOS() && !TargetTriple.isOSVersionLT(2))
+    return true;
+
+  return false;
+}
+
+StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const {
+  switch (TargetTriple.getObjectFormat()) {
+  case Triple::COFF:  return ".ASAN$GL";
+  case Triple::ELF:   return "asan_globals";
+  case Triple::MachO: return "__DATA,__asan_globals,regular";
+  case Triple::Wasm:
   case Triple::GOFF:
-  case Triple::XCOFF: 
-    report_fatal_error( 
+  case Triple::XCOFF:
+    report_fatal_error(
         "ModuleAddressSanitizer not implemented for object file format");
-  case Triple::UnknownObjectFormat: 
-    break; 
-  } 
-  llvm_unreachable("unsupported object format"); 
-} 
- 
-void ModuleAddressSanitizer::initializeCallbacks(Module &M) { 
-  IRBuilder<> IRB(*C); 
- 
-  // Declare our poisoning and unpoisoning functions. 
-  AsanPoisonGlobals = 
-      M.getOrInsertFunction(kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy); 
-  AsanUnpoisonGlobals = 
-      M.getOrInsertFunction(kAsanUnpoisonGlobalsName, IRB.getVoidTy()); 
- 
-  // Declare functions that register/unregister globals. 
-  AsanRegisterGlobals = M.getOrInsertFunction( 
-      kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy); 
-  AsanUnregisterGlobals = M.getOrInsertFunction( 
-      kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy); 
- 
-  // Declare the functions that find globals in a shared object and then invoke 
-  // the (un)register function on them. 
-  AsanRegisterImageGlobals = M.getOrInsertFunction( 
-      kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy); 
-  AsanUnregisterImageGlobals = M.getOrInsertFunction( 
-      kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy); 
- 
-  AsanRegisterElfGlobals = 
-      M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(), 
-                            IntptrTy, IntptrTy, IntptrTy); 
-  AsanUnregisterElfGlobals = 
-      M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(), 
-                            IntptrTy, IntptrTy, IntptrTy); 
-} 
- 
-// Put the metadata and the instrumented global in the same group. This ensures 
-// that the metadata is discarded if the instrumented global is discarded. 
-void ModuleAddressSanitizer::SetComdatForGlobalMetadata( 
-    GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) { 
-  Module &M = *G->getParent(); 
-  Comdat *C = G->getComdat(); 
-  if (!C) { 
-    if (!G->hasName()) { 
-      // If G is unnamed, it must be internal. Give it an artificial name 
-      // so we can put it in a comdat. 
-      assert(G->hasLocalLinkage()); 
-      G->setName(Twine(kAsanGenPrefix) + "_anon_global"); 
-    } 
- 
-    if (!InternalSuffix.empty() && G->hasLocalLinkage()) { 
-      std::string Name = std::string(G->getName()); 
-      Name += InternalSuffix; 
-      C = M.getOrInsertComdat(Name); 
-    } else { 
-      C = M.getOrInsertComdat(G->getName()); 
-    } 
- 
-    // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. Also upgrade private 
-    // linkage to internal linkage so that a symbol table entry is emitted. This 
-    // is necessary in order to create the comdat group. 
-    if (TargetTriple.isOSBinFormatCOFF()) { 
-      C->setSelectionKind(Comdat::NoDuplicates); 
-      if (G->hasPrivateLinkage()) 
-        G->setLinkage(GlobalValue::InternalLinkage); 
-    } 
-    G->setComdat(C); 
-  } 
- 
-  assert(G->hasComdat()); 
-  Metadata->setComdat(G->getComdat()); 
-} 
- 
-// Create a separate metadata global and put it in the appropriate ASan 
-// global registration section. 
-GlobalVariable * 
-ModuleAddressSanitizer::CreateMetadataGlobal(Module &M, Constant *Initializer, 
-                                             StringRef OriginalName) { 
-  auto Linkage = TargetTriple.isOSBinFormatMachO() 
-                     ? GlobalVariable::InternalLinkage 
-                     : GlobalVariable::PrivateLinkage; 
-  GlobalVariable *Metadata = new GlobalVariable( 
-      M, Initializer->getType(), false, Linkage, Initializer, 
-      Twine("__asan_global_") + GlobalValue::dropLLVMManglingEscape(OriginalName)); 
-  Metadata->setSection(getGlobalMetadataSection()); 
-  return Metadata; 
-} 
- 
-Instruction *ModuleAddressSanitizer::CreateAsanModuleDtor(Module &M) { 
-  AsanDtorFunction = 
-      Function::Create(FunctionType::get(Type::getVoidTy(*C), false), 
-                       GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); 
-  BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); 
- 
-  return ReturnInst::Create(*C, AsanDtorBB); 
-} 
- 
-void ModuleAddressSanitizer::InstrumentGlobalsCOFF( 
-    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, 
-    ArrayRef<Constant *> MetadataInitializers) { 
-  assert(ExtendedGlobals.size() == MetadataInitializers.size()); 
-  auto &DL = M.getDataLayout(); 
- 
-  SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size()); 
-  for (size_t i = 0; i < ExtendedGlobals.size(); i++) { 
-    Constant *Initializer = MetadataInitializers[i]; 
-    GlobalVariable *G = ExtendedGlobals[i]; 
-    GlobalVariable *Metadata = 
-        CreateMetadataGlobal(M, Initializer, G->getName()); 
-    MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G)); 
-    Metadata->setMetadata(LLVMContext::MD_associated, MD); 
-    MetadataGlobals[i] = Metadata; 
- 
-    // The MSVC linker always inserts padding when linking incrementally. We 
-    // cope with that by aligning each struct to its size, which must be a power 
-    // of two. 
-    unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(Initializer->getType()); 
-    assert(isPowerOf2_32(SizeOfGlobalStruct) && 
-           "global metadata will not be padded appropriately"); 
-    Metadata->setAlignment(assumeAligned(SizeOfGlobalStruct)); 
- 
-    SetComdatForGlobalMetadata(G, Metadata, ""); 
-  } 
- 
-  // Update llvm.compiler.used, adding the new metadata globals. This is 
-  // needed so that during LTO these variables stay alive. 
-  if (!MetadataGlobals.empty()) 
-    appendToCompilerUsed(M, MetadataGlobals); 
-} 
- 
-void ModuleAddressSanitizer::InstrumentGlobalsELF( 
-    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, 
-    ArrayRef<Constant *> MetadataInitializers, 
-    const std::string &UniqueModuleId) { 
-  assert(ExtendedGlobals.size() == MetadataInitializers.size()); 
- 
-  SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size()); 
-  for (size_t i = 0; i < ExtendedGlobals.size(); i++) { 
-    GlobalVariable *G = ExtendedGlobals[i]; 
-    GlobalVariable *Metadata = 
-        CreateMetadataGlobal(M, MetadataInitializers[i], G->getName()); 
-    MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G)); 
-    Metadata->setMetadata(LLVMContext::MD_associated, MD); 
-    MetadataGlobals[i] = Metadata; 
- 
-    SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId); 
-  } 
- 
-  // Update llvm.compiler.used, adding the new metadata globals. This is 
-  // needed so that during LTO these variables stay alive. 
+  case Triple::UnknownObjectFormat:
+    break;
+  }
+  llvm_unreachable("unsupported object format");
+}
+
+void ModuleAddressSanitizer::initializeCallbacks(Module &M) {
+  IRBuilder<> IRB(*C);
+
+  // Declare our poisoning and unpoisoning functions.
+  AsanPoisonGlobals =
+      M.getOrInsertFunction(kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy);
+  AsanUnpoisonGlobals =
+      M.getOrInsertFunction(kAsanUnpoisonGlobalsName, IRB.getVoidTy());
+
+  // Declare functions that register/unregister globals.
+  AsanRegisterGlobals = M.getOrInsertFunction(
+      kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+  AsanUnregisterGlobals = M.getOrInsertFunction(
+      kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+
+  // Declare the functions that find globals in a shared object and then invoke
+  // the (un)register function on them.
+  AsanRegisterImageGlobals = M.getOrInsertFunction(
+      kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy);
+  AsanUnregisterImageGlobals = M.getOrInsertFunction(
+      kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy);
+
+  AsanRegisterElfGlobals =
+      M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(),
+                            IntptrTy, IntptrTy, IntptrTy);
+  AsanUnregisterElfGlobals =
+      M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(),
+                            IntptrTy, IntptrTy, IntptrTy);
+}
+
+// Put the metadata and the instrumented global in the same group. This ensures
+// that the metadata is discarded if the instrumented global is discarded.
+void ModuleAddressSanitizer::SetComdatForGlobalMetadata(
+    GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) {
+  Module &M = *G->getParent();
+  Comdat *C = G->getComdat();
+  if (!C) {
+    if (!G->hasName()) {
+      // If G is unnamed, it must be internal. Give it an artificial name
+      // so we can put it in a comdat.
+      assert(G->hasLocalLinkage());
+      G->setName(Twine(kAsanGenPrefix) + "_anon_global");
+    }
+
+    if (!InternalSuffix.empty() && G->hasLocalLinkage()) {
+      std::string Name = std::string(G->getName());
+      Name += InternalSuffix;
+      C = M.getOrInsertComdat(Name);
+    } else {
+      C = M.getOrInsertComdat(G->getName());
+    }
+
+    // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. Also upgrade private
+    // linkage to internal linkage so that a symbol table entry is emitted. This
+    // is necessary in order to create the comdat group.
+    if (TargetTriple.isOSBinFormatCOFF()) {
+      C->setSelectionKind(Comdat::NoDuplicates);
+      if (G->hasPrivateLinkage())
+        G->setLinkage(GlobalValue::InternalLinkage);
+    }
+    G->setComdat(C);
+  }
+
+  assert(G->hasComdat());
+  Metadata->setComdat(G->getComdat());
+}
+
+// Create a separate metadata global and put it in the appropriate ASan
+// global registration section.
+GlobalVariable *
+ModuleAddressSanitizer::CreateMetadataGlobal(Module &M, Constant *Initializer,
+                                             StringRef OriginalName) {
+  auto Linkage = TargetTriple.isOSBinFormatMachO()
+                     ? GlobalVariable::InternalLinkage
+                     : GlobalVariable::PrivateLinkage;
+  GlobalVariable *Metadata = new GlobalVariable(
+      M, Initializer->getType(), false, Linkage, Initializer,
+      Twine("__asan_global_") + GlobalValue::dropLLVMManglingEscape(OriginalName));
+  Metadata->setSection(getGlobalMetadataSection());
+  return Metadata;
+}
+
+Instruction *ModuleAddressSanitizer::CreateAsanModuleDtor(Module &M) {
+  AsanDtorFunction =
+      Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
+                       GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+  BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
+
+  return ReturnInst::Create(*C, AsanDtorBB);
+}
+
+void ModuleAddressSanitizer::InstrumentGlobalsCOFF(
+    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+    ArrayRef<Constant *> MetadataInitializers) {
+  assert(ExtendedGlobals.size() == MetadataInitializers.size());
+  auto &DL = M.getDataLayout();
+
+  SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size());
+  for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+    Constant *Initializer = MetadataInitializers[i];
+    GlobalVariable *G = ExtendedGlobals[i];
+    GlobalVariable *Metadata =
+        CreateMetadataGlobal(M, Initializer, G->getName());
+    MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G));
+    Metadata->setMetadata(LLVMContext::MD_associated, MD);
+    MetadataGlobals[i] = Metadata;
+
+    // The MSVC linker always inserts padding when linking incrementally. We
+    // cope with that by aligning each struct to its size, which must be a power
+    // of two.
+    unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(Initializer->getType());
+    assert(isPowerOf2_32(SizeOfGlobalStruct) &&
+           "global metadata will not be padded appropriately");
+    Metadata->setAlignment(assumeAligned(SizeOfGlobalStruct));
+
+    SetComdatForGlobalMetadata(G, Metadata, "");
+  }
+
+  // Update llvm.compiler.used, adding the new metadata globals. This is
+  // needed so that during LTO these variables stay alive.
   if (!MetadataGlobals.empty())
     appendToCompilerUsed(M, MetadataGlobals);
- 
-  // RegisteredFlag serves two purposes. First, we can pass it to dladdr() 
-  // to look up the loaded image that contains it. Second, we can store in it 
-  // whether registration has already occurred, to prevent duplicate 
-  // registration. 
-  // 
-  // Common linkage ensures that there is only one global per shared library. 
-  GlobalVariable *RegisteredFlag = new GlobalVariable( 
-      M, IntptrTy, false, GlobalVariable::CommonLinkage, 
-      ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); 
-  RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); 
- 
+}
+
+void ModuleAddressSanitizer::InstrumentGlobalsELF(
+    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+    ArrayRef<Constant *> MetadataInitializers,
+    const std::string &UniqueModuleId) {
+  assert(ExtendedGlobals.size() == MetadataInitializers.size());
+
+  SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size());
+  for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+    GlobalVariable *G = ExtendedGlobals[i];
+    GlobalVariable *Metadata =
+        CreateMetadataGlobal(M, MetadataInitializers[i], G->getName());
+    MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G));
+    Metadata->setMetadata(LLVMContext::MD_associated, MD);
+    MetadataGlobals[i] = Metadata;
+
+    SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId);
+  }
+
+  // Update llvm.compiler.used, adding the new metadata globals. This is
+  // needed so that during LTO these variables stay alive.
+  if (!MetadataGlobals.empty())
+    appendToCompilerUsed(M, MetadataGlobals);
+
+  // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
+  // to look up the loaded image that contains it. Second, we can store in it
+  // whether registration has already occurred, to prevent duplicate
+  // registration.
+  //
+  // Common linkage ensures that there is only one global per shared library.
+  GlobalVariable *RegisteredFlag = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::CommonLinkage,
+      ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
+  RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
+
   // Create start and stop symbols.
   GlobalVariable *StartELFMetadata = new GlobalVariable(
       M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
@@ -2142,1326 +2142,1326 @@ void ModuleAddressSanitizer::InstrumentGlobalsELF(
       M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
       "__stop_" + getGlobalMetadataSection());
   StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
- 
-  // Create a call to register the globals with the runtime. 
-  IRB.CreateCall(AsanRegisterElfGlobals, 
-                 {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), 
-                  IRB.CreatePointerCast(StartELFMetadata, IntptrTy), 
-                  IRB.CreatePointerCast(StopELFMetadata, IntptrTy)}); 
- 
-  // We also need to unregister globals at the end, e.g., when a shared library 
-  // gets closed. 
-  IRBuilder<> IRB_Dtor(CreateAsanModuleDtor(M)); 
-  IRB_Dtor.CreateCall(AsanUnregisterElfGlobals, 
-                      {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), 
-                       IRB.CreatePointerCast(StartELFMetadata, IntptrTy), 
-                       IRB.CreatePointerCast(StopELFMetadata, IntptrTy)}); 
-} 
- 
-void ModuleAddressSanitizer::InstrumentGlobalsMachO( 
-    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, 
-    ArrayRef<Constant *> MetadataInitializers) { 
-  assert(ExtendedGlobals.size() == MetadataInitializers.size()); 
- 
-  // On recent Mach-O platforms, use a structure which binds the liveness of 
-  // the global variable to the metadata struct. Keep the list of "Liveness" GV 
-  // created to be added to llvm.compiler.used 
-  StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy); 
-  SmallVector<GlobalValue *, 16> LivenessGlobals(ExtendedGlobals.size()); 
- 
-  for (size_t i = 0; i < ExtendedGlobals.size(); i++) { 
-    Constant *Initializer = MetadataInitializers[i]; 
-    GlobalVariable *G = ExtendedGlobals[i]; 
-    GlobalVariable *Metadata = 
-        CreateMetadataGlobal(M, Initializer, G->getName()); 
- 
-    // On recent Mach-O platforms, we emit the global metadata in a way that 
-    // allows the linker to properly strip dead globals. 
-    auto LivenessBinder = 
-        ConstantStruct::get(LivenessTy, Initializer->getAggregateElement(0u), 
-                            ConstantExpr::getPointerCast(Metadata, IntptrTy)); 
-    GlobalVariable *Liveness = new GlobalVariable( 
-        M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder, 
-        Twine("__asan_binder_") + G->getName()); 
-    Liveness->setSection("__DATA,__asan_liveness,regular,live_support"); 
-    LivenessGlobals[i] = Liveness; 
-  } 
- 
-  // Update llvm.compiler.used, adding the new liveness globals. This is 
-  // needed so that during LTO these variables stay alive. The alternative 
-  // would be to have the linker handling the LTO symbols, but libLTO 
-  // current API does not expose access to the section for each symbol. 
-  if (!LivenessGlobals.empty()) 
-    appendToCompilerUsed(M, LivenessGlobals); 
- 
-  // RegisteredFlag serves two purposes. First, we can pass it to dladdr() 
-  // to look up the loaded image that contains it. Second, we can store in it 
-  // whether registration has already occurred, to prevent duplicate 
-  // registration. 
-  // 
-  // common linkage ensures that there is only one global per shared library. 
-  GlobalVariable *RegisteredFlag = new GlobalVariable( 
-      M, IntptrTy, false, GlobalVariable::CommonLinkage, 
-      ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); 
-  RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); 
- 
-  IRB.CreateCall(AsanRegisterImageGlobals, 
-                 {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); 
- 
-  // We also need to unregister globals at the end, e.g., when a shared library 
-  // gets closed. 
-  IRBuilder<> IRB_Dtor(CreateAsanModuleDtor(M)); 
-  IRB_Dtor.CreateCall(AsanUnregisterImageGlobals, 
-                      {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)}); 
-} 
- 
-void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray( 
-    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, 
-    ArrayRef<Constant *> MetadataInitializers) { 
-  assert(ExtendedGlobals.size() == MetadataInitializers.size()); 
-  unsigned N = ExtendedGlobals.size(); 
-  assert(N > 0); 
- 
-  // On platforms that don't have a custom metadata section, we emit an array 
-  // of global metadata structures. 
-  ArrayType *ArrayOfGlobalStructTy = 
-      ArrayType::get(MetadataInitializers[0]->getType(), N); 
-  auto AllGlobals = new GlobalVariable( 
-      M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage, 
-      ConstantArray::get(ArrayOfGlobalStructTy, MetadataInitializers), ""); 
-  if (Mapping.Scale > 3) 
-    AllGlobals->setAlignment(Align(1ULL << Mapping.Scale)); 
- 
-  IRB.CreateCall(AsanRegisterGlobals, 
-                 {IRB.CreatePointerCast(AllGlobals, IntptrTy), 
-                  ConstantInt::get(IntptrTy, N)}); 
- 
-  // We also need to unregister globals at the end, e.g., when a shared library 
-  // gets closed. 
-  IRBuilder<> IRB_Dtor(CreateAsanModuleDtor(M)); 
-  IRB_Dtor.CreateCall(AsanUnregisterGlobals, 
-                      {IRB.CreatePointerCast(AllGlobals, IntptrTy), 
-                       ConstantInt::get(IntptrTy, N)}); 
-} 
- 
-// This function replaces all global variables with new variables that have 
-// trailing redzones. It also creates a function that poisons 
-// redzones and inserts this function into llvm.global_ctors. 
-// Sets *CtorComdat to true if the global registration code emitted into the 
-// asan constructor is comdat-compatible. 
-bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M, 
-                                               bool *CtorComdat) { 
-  *CtorComdat = false; 
- 
-  // Build set of globals that are aliased by some GA, where 
-  // getExcludedAliasedGlobal(GA) returns the relevant GlobalVariable. 
-  SmallPtrSet<const GlobalVariable *, 16> AliasedGlobalExclusions; 
-  if (CompileKernel) { 
-    for (auto &GA : M.aliases()) { 
-      if (const GlobalVariable *GV = getExcludedAliasedGlobal(GA)) 
-        AliasedGlobalExclusions.insert(GV); 
-    } 
-  } 
- 
-  SmallVector<GlobalVariable *, 16> GlobalsToChange; 
-  for (auto &G : M.globals()) { 
-    if (!AliasedGlobalExclusions.count(&G) && shouldInstrumentGlobal(&G)) 
-      GlobalsToChange.push_back(&G); 
-  } 
- 
-  size_t n = GlobalsToChange.size(); 
-  if (n == 0) { 
-    *CtorComdat = true; 
-    return false; 
-  } 
- 
-  auto &DL = M.getDataLayout(); 
- 
-  // A global is described by a structure 
-  //   size_t beg; 
-  //   size_t size; 
-  //   size_t size_with_redzone; 
-  //   const char *name; 
-  //   const char *module_name; 
-  //   size_t has_dynamic_init; 
-  //   void *source_location; 
-  //   size_t odr_indicator; 
-  // We initialize an array of such structures and pass it to a run-time call. 
-  StructType *GlobalStructTy = 
-      StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, 
-                      IntptrTy, IntptrTy, IntptrTy); 
-  SmallVector<GlobalVariable *, 16> NewGlobals(n); 
-  SmallVector<Constant *, 16> Initializers(n); 
- 
-  bool HasDynamicallyInitializedGlobals = false; 
- 
-  // We shouldn't merge same module names, as this string serves as unique 
-  // module ID in runtime. 
-  GlobalVariable *ModuleName = createPrivateGlobalForString( 
-      M, M.getModuleIdentifier(), /*AllowMerging*/ false, kAsanGenPrefix); 
- 
-  for (size_t i = 0; i < n; i++) { 
-    GlobalVariable *G = GlobalsToChange[i]; 
- 
-    // FIXME: Metadata should be attched directly to the global directly instead 
-    // of being added to llvm.asan.globals. 
-    auto MD = GlobalsMD.get(G); 
-    StringRef NameForGlobal = G->getName(); 
-    // Create string holding the global name (use global name from metadata 
-    // if it's available, otherwise just write the name of global variable). 
-    GlobalVariable *Name = createPrivateGlobalForString( 
-        M, MD.Name.empty() ? NameForGlobal : MD.Name, 
-        /*AllowMerging*/ true, kAsanGenPrefix); 
- 
-    Type *Ty = G->getValueType(); 
-    const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); 
-    const uint64_t RightRedzoneSize = getRedzoneSizeForGlobal(SizeInBytes); 
-    Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); 
- 
-    StructType *NewTy = StructType::get(Ty, RightRedZoneTy); 
-    Constant *NewInitializer = ConstantStruct::get( 
-        NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy)); 
- 
-    // Create a new global variable with enough space for a redzone. 
-    GlobalValue::LinkageTypes Linkage = G->getLinkage(); 
-    if (G->isConstant() && Linkage == GlobalValue::PrivateLinkage) 
-      Linkage = GlobalValue::InternalLinkage; 
-    GlobalVariable *NewGlobal = 
-        new GlobalVariable(M, NewTy, G->isConstant(), Linkage, NewInitializer, 
-                           "", G, G->getThreadLocalMode()); 
-    NewGlobal->copyAttributesFrom(G); 
-    NewGlobal->setComdat(G->getComdat()); 
-    NewGlobal->setAlignment(MaybeAlign(getMinRedzoneSizeForGlobal())); 
-    // Don't fold globals with redzones. ODR violation detector and redzone 
-    // poisoning implicitly creates a dependence on the global's address, so it 
-    // is no longer valid for it to be marked unnamed_addr. 
-    NewGlobal->setUnnamedAddr(GlobalValue::UnnamedAddr::None); 
- 
-    // Move null-terminated C strings to "__asan_cstring" section on Darwin. 
-    if (TargetTriple.isOSBinFormatMachO() && !G->hasSection() && 
-        G->isConstant()) { 
-      auto Seq = dyn_cast<ConstantDataSequential>(G->getInitializer()); 
-      if (Seq && Seq->isCString()) 
-        NewGlobal->setSection("__TEXT,__asan_cstring,regular"); 
-    } 
- 
+
+  // Create a call to register the globals with the runtime.
+  IRB.CreateCall(AsanRegisterElfGlobals,
+                 {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+                  IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+                  IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
+
+  // We also need to unregister globals at the end, e.g., when a shared library
+  // gets closed.
+  IRBuilder<> IRB_Dtor(CreateAsanModuleDtor(M));
+  IRB_Dtor.CreateCall(AsanUnregisterElfGlobals,
+                      {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+                       IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+                       IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
+}
+
+void ModuleAddressSanitizer::InstrumentGlobalsMachO(
+    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+    ArrayRef<Constant *> MetadataInitializers) {
+  assert(ExtendedGlobals.size() == MetadataInitializers.size());
+
+  // On recent Mach-O platforms, use a structure which binds the liveness of
+  // the global variable to the metadata struct. Keep the list of "Liveness" GV
+  // created to be added to llvm.compiler.used
+  StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy);
+  SmallVector<GlobalValue *, 16> LivenessGlobals(ExtendedGlobals.size());
+
+  for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+    Constant *Initializer = MetadataInitializers[i];
+    GlobalVariable *G = ExtendedGlobals[i];
+    GlobalVariable *Metadata =
+        CreateMetadataGlobal(M, Initializer, G->getName());
+
+    // On recent Mach-O platforms, we emit the global metadata in a way that
+    // allows the linker to properly strip dead globals.
+    auto LivenessBinder =
+        ConstantStruct::get(LivenessTy, Initializer->getAggregateElement(0u),
+                            ConstantExpr::getPointerCast(Metadata, IntptrTy));
+    GlobalVariable *Liveness = new GlobalVariable(
+        M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder,
+        Twine("__asan_binder_") + G->getName());
+    Liveness->setSection("__DATA,__asan_liveness,regular,live_support");
+    LivenessGlobals[i] = Liveness;
+  }
+
+  // Update llvm.compiler.used, adding the new liveness globals. This is
+  // needed so that during LTO these variables stay alive. The alternative
+  // would be to have the linker handling the LTO symbols, but libLTO
+  // current API does not expose access to the section for each symbol.
+  if (!LivenessGlobals.empty())
+    appendToCompilerUsed(M, LivenessGlobals);
+
+  // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
+  // to look up the loaded image that contains it. Second, we can store in it
+  // whether registration has already occurred, to prevent duplicate
+  // registration.
+  //
+  // common linkage ensures that there is only one global per shared library.
+  GlobalVariable *RegisteredFlag = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::CommonLinkage,
+      ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
+  RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
+
+  IRB.CreateCall(AsanRegisterImageGlobals,
+                 {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
+
+  // We also need to unregister globals at the end, e.g., when a shared library
+  // gets closed.
+  IRBuilder<> IRB_Dtor(CreateAsanModuleDtor(M));
+  IRB_Dtor.CreateCall(AsanUnregisterImageGlobals,
+                      {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
+}
+
+void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray(
+    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+    ArrayRef<Constant *> MetadataInitializers) {
+  assert(ExtendedGlobals.size() == MetadataInitializers.size());
+  unsigned N = ExtendedGlobals.size();
+  assert(N > 0);
+
+  // On platforms that don't have a custom metadata section, we emit an array
+  // of global metadata structures.
+  ArrayType *ArrayOfGlobalStructTy =
+      ArrayType::get(MetadataInitializers[0]->getType(), N);
+  auto AllGlobals = new GlobalVariable(
+      M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage,
+      ConstantArray::get(ArrayOfGlobalStructTy, MetadataInitializers), "");
+  if (Mapping.Scale > 3)
+    AllGlobals->setAlignment(Align(1ULL << Mapping.Scale));
+
+  IRB.CreateCall(AsanRegisterGlobals,
+                 {IRB.CreatePointerCast(AllGlobals, IntptrTy),
+                  ConstantInt::get(IntptrTy, N)});
+
+  // We also need to unregister globals at the end, e.g., when a shared library
+  // gets closed.
+  IRBuilder<> IRB_Dtor(CreateAsanModuleDtor(M));
+  IRB_Dtor.CreateCall(AsanUnregisterGlobals,
+                      {IRB.CreatePointerCast(AllGlobals, IntptrTy),
+                       ConstantInt::get(IntptrTy, N)});
+}
+
+// This function replaces all global variables with new variables that have
+// trailing redzones. It also creates a function that poisons
+// redzones and inserts this function into llvm.global_ctors.
+// Sets *CtorComdat to true if the global registration code emitted into the
+// asan constructor is comdat-compatible.
+bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
+                                               bool *CtorComdat) {
+  *CtorComdat = false;
+
+  // Build set of globals that are aliased by some GA, where
+  // getExcludedAliasedGlobal(GA) returns the relevant GlobalVariable.
+  SmallPtrSet<const GlobalVariable *, 16> AliasedGlobalExclusions;
+  if (CompileKernel) {
+    for (auto &GA : M.aliases()) {
+      if (const GlobalVariable *GV = getExcludedAliasedGlobal(GA))
+        AliasedGlobalExclusions.insert(GV);
+    }
+  }
+
+  SmallVector<GlobalVariable *, 16> GlobalsToChange;
+  for (auto &G : M.globals()) {
+    if (!AliasedGlobalExclusions.count(&G) && shouldInstrumentGlobal(&G))
+      GlobalsToChange.push_back(&G);
+  }
+
+  size_t n = GlobalsToChange.size();
+  if (n == 0) {
+    *CtorComdat = true;
+    return false;
+  }
+
+  auto &DL = M.getDataLayout();
+
+  // A global is described by a structure
+  //   size_t beg;
+  //   size_t size;
+  //   size_t size_with_redzone;
+  //   const char *name;
+  //   const char *module_name;
+  //   size_t has_dynamic_init;
+  //   void *source_location;
+  //   size_t odr_indicator;
+  // We initialize an array of such structures and pass it to a run-time call.
+  StructType *GlobalStructTy =
+      StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy,
+                      IntptrTy, IntptrTy, IntptrTy);
+  SmallVector<GlobalVariable *, 16> NewGlobals(n);
+  SmallVector<Constant *, 16> Initializers(n);
+
+  bool HasDynamicallyInitializedGlobals = false;
+
+  // We shouldn't merge same module names, as this string serves as unique
+  // module ID in runtime.
+  GlobalVariable *ModuleName = createPrivateGlobalForString(
+      M, M.getModuleIdentifier(), /*AllowMerging*/ false, kAsanGenPrefix);
+
+  for (size_t i = 0; i < n; i++) {
+    GlobalVariable *G = GlobalsToChange[i];
+
+    // FIXME: Metadata should be attched directly to the global directly instead
+    // of being added to llvm.asan.globals.
+    auto MD = GlobalsMD.get(G);
+    StringRef NameForGlobal = G->getName();
+    // Create string holding the global name (use global name from metadata
+    // if it's available, otherwise just write the name of global variable).
+    GlobalVariable *Name = createPrivateGlobalForString(
+        M, MD.Name.empty() ? NameForGlobal : MD.Name,
+        /*AllowMerging*/ true, kAsanGenPrefix);
+
+    Type *Ty = G->getValueType();
+    const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);
+    const uint64_t RightRedzoneSize = getRedzoneSizeForGlobal(SizeInBytes);
+    Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
+
+    StructType *NewTy = StructType::get(Ty, RightRedZoneTy);
+    Constant *NewInitializer = ConstantStruct::get(
+        NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy));
+
+    // Create a new global variable with enough space for a redzone.
+    GlobalValue::LinkageTypes Linkage = G->getLinkage();
+    if (G->isConstant() && Linkage == GlobalValue::PrivateLinkage)
+      Linkage = GlobalValue::InternalLinkage;
+    GlobalVariable *NewGlobal =
+        new GlobalVariable(M, NewTy, G->isConstant(), Linkage, NewInitializer,
+                           "", G, G->getThreadLocalMode());
+    NewGlobal->copyAttributesFrom(G);
+    NewGlobal->setComdat(G->getComdat());
+    NewGlobal->setAlignment(MaybeAlign(getMinRedzoneSizeForGlobal()));
+    // Don't fold globals with redzones. ODR violation detector and redzone
+    // poisoning implicitly creates a dependence on the global's address, so it
+    // is no longer valid for it to be marked unnamed_addr.
+    NewGlobal->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
+
+    // Move null-terminated C strings to "__asan_cstring" section on Darwin.
+    if (TargetTriple.isOSBinFormatMachO() && !G->hasSection() &&
+        G->isConstant()) {
+      auto Seq = dyn_cast<ConstantDataSequential>(G->getInitializer());
+      if (Seq && Seq->isCString())
+        NewGlobal->setSection("__TEXT,__asan_cstring,regular");
+    }
+
     // Transfer the debug info and type metadata.  The payload starts at offset
     // zero so we can copy the metadata over as is.
     NewGlobal->copyMetadata(G, 0);
- 
-    Value *Indices2[2]; 
-    Indices2[0] = IRB.getInt32(0); 
-    Indices2[1] = IRB.getInt32(0); 
- 
-    G->replaceAllUsesWith( 
-        ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true)); 
-    NewGlobal->takeName(G); 
-    G->eraseFromParent(); 
-    NewGlobals[i] = NewGlobal; 
- 
-    Constant *SourceLoc; 
-    if (!MD.SourceLoc.empty()) { 
-      auto SourceLocGlobal = createPrivateGlobalForSourceLoc(M, MD.SourceLoc); 
-      SourceLoc = ConstantExpr::getPointerCast(SourceLocGlobal, IntptrTy); 
-    } else { 
-      SourceLoc = ConstantInt::get(IntptrTy, 0); 
-    } 
- 
-    Constant *ODRIndicator = ConstantExpr::getNullValue(IRB.getInt8PtrTy()); 
-    GlobalValue *InstrumentedGlobal = NewGlobal; 
- 
-    bool CanUsePrivateAliases = 
-        TargetTriple.isOSBinFormatELF() || TargetTriple.isOSBinFormatMachO() || 
-        TargetTriple.isOSBinFormatWasm(); 
-    if (CanUsePrivateAliases && UsePrivateAlias) { 
-      // Create local alias for NewGlobal to avoid crash on ODR between 
-      // instrumented and non-instrumented libraries. 
-      InstrumentedGlobal = 
-          GlobalAlias::create(GlobalValue::PrivateLinkage, "", NewGlobal); 
-    } 
- 
-    // ODR should not happen for local linkage. 
-    if (NewGlobal->hasLocalLinkage()) { 
-      ODRIndicator = ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, -1), 
-                                               IRB.getInt8PtrTy()); 
-    } else if (UseOdrIndicator) { 
-      // With local aliases, we need to provide another externally visible 
-      // symbol __odr_asan_XXX to detect ODR violation. 
-      auto *ODRIndicatorSym = 
-          new GlobalVariable(M, IRB.getInt8Ty(), false, Linkage, 
-                             Constant::getNullValue(IRB.getInt8Ty()), 
-                             kODRGenPrefix + NameForGlobal, nullptr, 
-                             NewGlobal->getThreadLocalMode()); 
- 
-      // Set meaningful attributes for indicator symbol. 
-      ODRIndicatorSym->setVisibility(NewGlobal->getVisibility()); 
-      ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass()); 
-      ODRIndicatorSym->setAlignment(Align(1)); 
-      ODRIndicator = ODRIndicatorSym; 
-    } 
- 
-    Constant *Initializer = ConstantStruct::get( 
-        GlobalStructTy, 
-        ConstantExpr::getPointerCast(InstrumentedGlobal, IntptrTy), 
-        ConstantInt::get(IntptrTy, SizeInBytes), 
-        ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), 
-        ConstantExpr::getPointerCast(Name, IntptrTy), 
-        ConstantExpr::getPointerCast(ModuleName, IntptrTy), 
-        ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, 
-        ConstantExpr::getPointerCast(ODRIndicator, IntptrTy)); 
- 
-    if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true; 
- 
-    LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); 
- 
-    Initializers[i] = Initializer; 
-  } 
- 
-  // Add instrumented globals to llvm.compiler.used list to avoid LTO from 
-  // ConstantMerge'ing them. 
-  SmallVector<GlobalValue *, 16> GlobalsToAddToUsedList; 
-  for (size_t i = 0; i < n; i++) { 
-    GlobalVariable *G = NewGlobals[i]; 
-    if (G->getName().empty()) continue; 
-    GlobalsToAddToUsedList.push_back(G); 
-  } 
-  appendToCompilerUsed(M, ArrayRef<GlobalValue *>(GlobalsToAddToUsedList)); 
- 
-  std::string ELFUniqueModuleId = 
-      (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M) 
-                                                        : ""; 
- 
-  if (!ELFUniqueModuleId.empty()) { 
-    InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId); 
-    *CtorComdat = true; 
-  } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) { 
-    InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers); 
-  } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) { 
-    InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers); 
-  } else { 
-    InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers); 
-  } 
- 
-  // Create calls for poisoning before initializers run and unpoisoning after. 
-  if (HasDynamicallyInitializedGlobals) 
-    createInitializerPoisonCalls(M, ModuleName); 
- 
-  LLVM_DEBUG(dbgs() << M); 
-  return true; 
-} 
- 
-uint64_t 
-ModuleAddressSanitizer::getRedzoneSizeForGlobal(uint64_t SizeInBytes) const { 
-  constexpr uint64_t kMaxRZ = 1 << 18; 
-  const uint64_t MinRZ = getMinRedzoneSizeForGlobal(); 
- 
-  // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes. 
-  uint64_t RZ = 
-      std::max(MinRZ, std::min(kMaxRZ, (SizeInBytes / MinRZ / 4) * MinRZ)); 
- 
-  // Round up to multiple of MinRZ. 
-  if (SizeInBytes % MinRZ) 
-    RZ += MinRZ - (SizeInBytes % MinRZ); 
-  assert((RZ + SizeInBytes) % MinRZ == 0); 
- 
-  return RZ; 
-} 
- 
-int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const { 
-  int LongSize = M.getDataLayout().getPointerSizeInBits(); 
-  bool isAndroid = Triple(M.getTargetTriple()).isAndroid(); 
-  int Version = 8; 
-  // 32-bit Android is one version ahead because of the switch to dynamic 
-  // shadow. 
-  Version += (LongSize == 32 && isAndroid); 
-  return Version; 
-} 
- 
-bool ModuleAddressSanitizer::instrumentModule(Module &M) { 
-  initializeCallbacks(M); 
- 
-  // Create a module constructor. A destructor is created lazily because not all 
-  // platforms, and not all modules need it. 
-  if (CompileKernel) { 
-    // The kernel always builds with its own runtime, and therefore does not 
-    // need the init and version check calls. 
-    AsanCtorFunction = createSanitizerCtor(M, kAsanModuleCtorName); 
-  } else { 
-    std::string AsanVersion = std::to_string(GetAsanVersion(M)); 
-    std::string VersionCheckName = 
-        ClInsertVersionCheck ? (kAsanVersionCheckNamePrefix + AsanVersion) : ""; 
-    std::tie(AsanCtorFunction, std::ignore) = 
-        createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName, 
-                                            kAsanInitName, /*InitArgTypes=*/{}, 
-                                            /*InitArgs=*/{}, VersionCheckName); 
-  } 
- 
-  bool CtorComdat = true; 
-  if (ClGlobals) { 
-    IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator()); 
-    InstrumentGlobals(IRB, M, &CtorComdat); 
-  } 
- 
-  const uint64_t Priority = GetCtorAndDtorPriority(TargetTriple); 
- 
-  // Put the constructor and destructor in comdat if both 
-  // (1) global instrumentation is not TU-specific 
-  // (2) target is ELF. 
-  if (UseCtorComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) { 
-    AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName)); 
-    appendToGlobalCtors(M, AsanCtorFunction, Priority, AsanCtorFunction); 
-    if (AsanDtorFunction) { 
-      AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName)); 
-      appendToGlobalDtors(M, AsanDtorFunction, Priority, AsanDtorFunction); 
-    } 
-  } else { 
-    appendToGlobalCtors(M, AsanCtorFunction, Priority); 
-    if (AsanDtorFunction) 
-      appendToGlobalDtors(M, AsanDtorFunction, Priority); 
-  } 
- 
-  return true; 
-} 
- 
-void AddressSanitizer::initializeCallbacks(Module &M) { 
-  IRBuilder<> IRB(*C); 
-  // Create __asan_report* callbacks. 
-  // IsWrite, TypeSize and Exp are encoded in the function name. 
-  for (int Exp = 0; Exp < 2; Exp++) { 
-    for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { 
-      const std::string TypeStr = AccessIsWrite ? "store" : "load"; 
-      const std::string ExpStr = Exp ? "exp_" : ""; 
-      const std::string EndingStr = Recover ? "_noabort" : ""; 
- 
-      SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy}; 
-      SmallVector<Type *, 2> Args1{1, IntptrTy}; 
-      if (Exp) { 
-        Type *ExpType = Type::getInt32Ty(*C); 
-        Args2.push_back(ExpType); 
-        Args1.push_back(ExpType); 
-      } 
-      AsanErrorCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction( 
-          kAsanReportErrorTemplate + ExpStr + TypeStr + "_n" + EndingStr, 
-          FunctionType::get(IRB.getVoidTy(), Args2, false)); 
- 
-      AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction( 
-          ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr, 
-          FunctionType::get(IRB.getVoidTy(), Args2, false)); 
- 
-      for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; 
-           AccessSizeIndex++) { 
-        const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex); 
-        AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = 
-            M.getOrInsertFunction( 
-                kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr, 
-                FunctionType::get(IRB.getVoidTy(), Args1, false)); 
- 
-        AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = 
-            M.getOrInsertFunction( 
-                ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr, 
-                FunctionType::get(IRB.getVoidTy(), Args1, false)); 
-      } 
-    } 
-  } 
- 
-  const std::string MemIntrinCallbackPrefix = 
-      CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix; 
-  AsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove", 
-                                      IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 
-                                      IRB.getInt8PtrTy(), IntptrTy); 
-  AsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy", 
-                                     IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 
-                                     IRB.getInt8PtrTy(), IntptrTy); 
-  AsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset", 
-                                     IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 
-                                     IRB.getInt32Ty(), IntptrTy); 
- 
-  AsanHandleNoReturnFunc = 
-      M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy()); 
- 
-  AsanPtrCmpFunction = 
-      M.getOrInsertFunction(kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy); 
-  AsanPtrSubFunction = 
-      M.getOrInsertFunction(kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy); 
-  if (Mapping.InGlobal) 
-    AsanShadowGlobal = M.getOrInsertGlobal("__asan_shadow", 
-                                           ArrayType::get(IRB.getInt8Ty(), 0)); 
-} 
- 
-bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { 
-  // For each NSObject descendant having a +load method, this method is invoked 
-  // by the ObjC runtime before any of the static constructors is called. 
-  // Therefore we need to instrument such methods with a call to __asan_init 
-  // at the beginning in order to initialize our runtime before any access to 
-  // the shadow memory. 
-  // We cannot just ignore these methods, because they may call other 
-  // instrumented functions. 
-  if (F.getName().find(" load]") != std::string::npos) { 
-    FunctionCallee AsanInitFunction = 
-        declareSanitizerInitFunction(*F.getParent(), kAsanInitName, {}); 
-    IRBuilder<> IRB(&F.front(), F.front().begin()); 
-    IRB.CreateCall(AsanInitFunction, {}); 
-    return true; 
-  } 
-  return false; 
-} 
- 
-bool AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) { 
-  // Generate code only when dynamic addressing is needed. 
-  if (Mapping.Offset != kDynamicShadowSentinel) 
-    return false; 
- 
-  IRBuilder<> IRB(&F.front().front()); 
-  if (Mapping.InGlobal) { 
-    if (ClWithIfuncSuppressRemat) { 
-      // An empty inline asm with input reg == output reg. 
-      // An opaque pointer-to-int cast, basically. 
-      InlineAsm *Asm = InlineAsm::get( 
-          FunctionType::get(IntptrTy, {AsanShadowGlobal->getType()}, false), 
-          StringRef(""), StringRef("=r,0"), 
-          /*hasSideEffects=*/false); 
-      LocalDynamicShadow = 
-          IRB.CreateCall(Asm, {AsanShadowGlobal}, ".asan.shadow"); 
-    } else { 
-      LocalDynamicShadow = 
-          IRB.CreatePointerCast(AsanShadowGlobal, IntptrTy, ".asan.shadow"); 
-    } 
-  } else { 
-    Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( 
-        kAsanShadowMemoryDynamicAddress, IntptrTy); 
-    LocalDynamicShadow = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress); 
-  } 
-  return true; 
-} 
- 
-void AddressSanitizer::markEscapedLocalAllocas(Function &F) { 
-  // Find the one possible call to llvm.localescape and pre-mark allocas passed 
-  // to it as uninteresting. This assumes we haven't started processing allocas 
-  // yet. This check is done up front because iterating the use list in 
-  // isInterestingAlloca would be algorithmically slower. 
-  assert(ProcessedAllocas.empty() && "must process localescape before allocas"); 
- 
-  // Try to get the declaration of llvm.localescape. If it's not in the module, 
-  // we can exit early. 
-  if (!F.getParent()->getFunction("llvm.localescape")) return; 
- 
-  // Look for a call to llvm.localescape call in the entry block. It can't be in 
-  // any other block. 
-  for (Instruction &I : F.getEntryBlock()) { 
-    IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); 
-    if (II && II->getIntrinsicID() == Intrinsic::localescape) { 
-      // We found a call. Mark all the allocas passed in as uninteresting. 
-      for (Value *Arg : II->arg_operands()) { 
-        AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts()); 
-        assert(AI && AI->isStaticAlloca() && 
-               "non-static alloca arg to localescape"); 
-        ProcessedAllocas[AI] = false; 
-      } 
-      break; 
-    } 
-  } 
-} 
- 
-bool AddressSanitizer::suppressInstrumentationSiteForDebug(int &Instrumented) { 
-  bool ShouldInstrument = 
-      ClDebugMin < 0 || ClDebugMax < 0 || 
-      (Instrumented >= ClDebugMin && Instrumented <= ClDebugMax); 
-  Instrumented++; 
-  return !ShouldInstrument; 
-} 
- 
-bool AddressSanitizer::instrumentFunction(Function &F, 
-                                          const TargetLibraryInfo *TLI) { 
-  if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; 
-  if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false; 
-  if (F.getName().startswith("__asan_")) return false; 
- 
-  bool FunctionModified = false; 
- 
-  // If needed, insert __asan_init before checking for SanitizeAddress attr. 
-  // This function needs to be called even if the function body is not 
-  // instrumented. 
-  if (maybeInsertAsanInitAtFunctionEntry(F)) 
-    FunctionModified = true; 
- 
-  // Leave if the function doesn't need instrumentation. 
-  if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified; 
- 
-  LLVM_DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); 
- 
-  initializeCallbacks(*F.getParent()); 
- 
-  FunctionStateRAII CleanupObj(this); 
- 
-  FunctionModified |= maybeInsertDynamicShadowAtFunctionEntry(F); 
- 
-  // We can't instrument allocas used with llvm.localescape. Only static allocas 
-  // can be passed to that intrinsic. 
-  markEscapedLocalAllocas(F); 
- 
-  // We want to instrument every address only once per basic block (unless there 
-  // are calls between uses). 
-  SmallPtrSet<Value *, 16> TempsToInstrument; 
-  SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument; 
-  SmallVector<MemIntrinsic *, 16> IntrinToInstrument; 
-  SmallVector<Instruction *, 8> NoReturnCalls; 
-  SmallVector<BasicBlock *, 16> AllBlocks; 
-  SmallVector<Instruction *, 16> PointerComparisonsOrSubtracts; 
-  int NumAllocas = 0; 
- 
-  // Fill the set of memory operations to instrument. 
-  for (auto &BB : F) { 
-    AllBlocks.push_back(&BB); 
-    TempsToInstrument.clear(); 
-    int NumInsnsPerBB = 0; 
-    for (auto &Inst : BB) { 
-      if (LooksLikeCodeInBug11395(&Inst)) return false; 
-      SmallVector<InterestingMemoryOperand, 1> InterestingOperands; 
-      getInterestingMemoryOperands(&Inst, InterestingOperands); 
- 
-      if (!InterestingOperands.empty()) { 
-        for (auto &Operand : InterestingOperands) { 
-          if (ClOpt && ClOptSameTemp) { 
-            Value *Ptr = Operand.getPtr(); 
-            // If we have a mask, skip instrumentation if we've already 
-            // instrumented the full object. But don't add to TempsToInstrument 
-            // because we might get another load/store with a different mask. 
-            if (Operand.MaybeMask) { 
-              if (TempsToInstrument.count(Ptr)) 
-                continue; // We've seen this (whole) temp in the current BB. 
-            } else { 
-              if (!TempsToInstrument.insert(Ptr).second) 
-                continue; // We've seen this temp in the current BB. 
-            } 
-          } 
-          OperandsToInstrument.push_back(Operand); 
-          NumInsnsPerBB++; 
-        } 
-      } else if (((ClInvalidPointerPairs || ClInvalidPointerCmp) && 
-                  isInterestingPointerComparison(&Inst)) || 
-                 ((ClInvalidPointerPairs || ClInvalidPointerSub) && 
-                  isInterestingPointerSubtraction(&Inst))) { 
-        PointerComparisonsOrSubtracts.push_back(&Inst); 
-      } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) { 
-        // ok, take it. 
-        IntrinToInstrument.push_back(MI); 
-        NumInsnsPerBB++; 
-      } else { 
-        if (isa<AllocaInst>(Inst)) NumAllocas++; 
-        if (auto *CB = dyn_cast<CallBase>(&Inst)) { 
-          // A call inside BB. 
-          TempsToInstrument.clear(); 
-          if (CB->doesNotReturn() && !CB->hasMetadata("nosanitize")) 
-            NoReturnCalls.push_back(CB); 
-        } 
-        if (CallInst *CI = dyn_cast<CallInst>(&Inst)) 
-          maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); 
-      } 
-      if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break; 
-    } 
-  } 
- 
-  bool UseCalls = (ClInstrumentationWithCallsThreshold >= 0 && 
-                   OperandsToInstrument.size() + IntrinToInstrument.size() > 
-                       (unsigned)ClInstrumentationWithCallsThreshold); 
-  const DataLayout &DL = F.getParent()->getDataLayout(); 
-  ObjectSizeOpts ObjSizeOpts; 
-  ObjSizeOpts.RoundToAlign = true; 
-  ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(), ObjSizeOpts); 
- 
-  // Instrument. 
-  int NumInstrumented = 0; 
-  for (auto &Operand : OperandsToInstrument) { 
-    if (!suppressInstrumentationSiteForDebug(NumInstrumented)) 
-      instrumentMop(ObjSizeVis, Operand, UseCalls, 
-                    F.getParent()->getDataLayout()); 
-    FunctionModified = true; 
-  } 
-  for (auto Inst : IntrinToInstrument) { 
-    if (!suppressInstrumentationSiteForDebug(NumInstrumented)) 
-      instrumentMemIntrinsic(Inst); 
-    FunctionModified = true; 
-  } 
- 
-  FunctionStackPoisoner FSP(F, *this); 
-  bool ChangedStack = FSP.runOnFunction(); 
- 
-  // We must unpoison the stack before NoReturn calls (throw, _exit, etc). 
-  // See e.g. https://github.com/google/sanitizers/issues/37 
-  for (auto CI : NoReturnCalls) { 
-    IRBuilder<> IRB(CI); 
-    IRB.CreateCall(AsanHandleNoReturnFunc, {}); 
-  } 
- 
-  for (auto Inst : PointerComparisonsOrSubtracts) { 
-    instrumentPointerComparisonOrSubtraction(Inst); 
-    FunctionModified = true; 
-  } 
- 
-  if (ChangedStack || !NoReturnCalls.empty()) 
-    FunctionModified = true; 
- 
-  LLVM_DEBUG(dbgs() << "ASAN done instrumenting: " << FunctionModified << " " 
-                    << F << "\n"); 
- 
-  return FunctionModified; 
-} 
- 
-// Workaround for bug 11395: we don't want to instrument stack in functions 
-// with large assembly blobs (32-bit only), otherwise reg alloc may crash. 
-// FIXME: remove once the bug 11395 is fixed. 
-bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) { 
-  if (LongSize != 32) return false; 
-  CallInst *CI = dyn_cast<CallInst>(I); 
-  if (!CI || !CI->isInlineAsm()) return false; 
-  if (CI->getNumArgOperands() <= 5) return false; 
-  // We have inline assembly with quite a few arguments. 
-  return true; 
-} 
- 
-void FunctionStackPoisoner::initializeCallbacks(Module &M) { 
-  IRBuilder<> IRB(*C); 
-  for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) { 
-    std::string Suffix = itostr(i); 
-    AsanStackMallocFunc[i] = M.getOrInsertFunction( 
-        kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy); 
-    AsanStackFreeFunc[i] = 
-        M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix, 
-                              IRB.getVoidTy(), IntptrTy, IntptrTy); 
-  } 
-  if (ASan.UseAfterScope) { 
-    AsanPoisonStackMemoryFunc = M.getOrInsertFunction( 
-        kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy); 
-    AsanUnpoisonStackMemoryFunc = M.getOrInsertFunction( 
-        kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy); 
-  } 
- 
-  for (size_t Val : {0x00, 0xf1, 0xf2, 0xf3, 0xf5, 0xf8}) { 
-    std::ostringstream Name; 
-    Name << kAsanSetShadowPrefix; 
-    Name << std::setw(2) << std::setfill('0') << std::hex << Val; 
-    AsanSetShadowFunc[Val] = 
-        M.getOrInsertFunction(Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy); 
-  } 
- 
-  AsanAllocaPoisonFunc = M.getOrInsertFunction( 
-      kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy); 
-  AsanAllocasUnpoisonFunc = M.getOrInsertFunction( 
-      kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy); 
-} 
- 
-void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask, 
-                                               ArrayRef<uint8_t> ShadowBytes, 
-                                               size_t Begin, size_t End, 
-                                               IRBuilder<> &IRB, 
-                                               Value *ShadowBase) { 
-  if (Begin >= End) 
-    return; 
- 
-  const size_t LargestStoreSizeInBytes = 
-      std::min<size_t>(sizeof(uint64_t), ASan.LongSize / 8); 
- 
-  const bool IsLittleEndian = F.getParent()->getDataLayout().isLittleEndian(); 
- 
-  // Poison given range in shadow using larges store size with out leading and 
-  // trailing zeros in ShadowMask. Zeros never change, so they need neither 
-  // poisoning nor up-poisoning. Still we don't mind if some of them get into a 
-  // middle of a store. 
-  for (size_t i = Begin; i < End;) { 
-    if (!ShadowMask[i]) { 
-      assert(!ShadowBytes[i]); 
-      ++i; 
-      continue; 
-    } 
- 
-    size_t StoreSizeInBytes = LargestStoreSizeInBytes; 
-    // Fit store size into the range. 
-    while (StoreSizeInBytes > End - i) 
-      StoreSizeInBytes /= 2; 
- 
-    // Minimize store size by trimming trailing zeros. 
-    for (size_t j = StoreSizeInBytes - 1; j && !ShadowMask[i + j]; --j) { 
-      while (j <= StoreSizeInBytes / 2) 
-        StoreSizeInBytes /= 2; 
-    } 
- 
-    uint64_t Val = 0; 
-    for (size_t j = 0; j < StoreSizeInBytes; j++) { 
-      if (IsLittleEndian) 
-        Val |= (uint64_t)ShadowBytes[i + j] << (8 * j); 
-      else 
-        Val = (Val << 8) | ShadowBytes[i + j]; 
-    } 
- 
-    Value *Ptr = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)); 
-    Value *Poison = IRB.getIntN(StoreSizeInBytes * 8, Val); 
-    IRB.CreateAlignedStore( 
-        Poison, IRB.CreateIntToPtr(Ptr, Poison->getType()->getPointerTo()), 
-        Align(1)); 
- 
-    i += StoreSizeInBytes; 
-  } 
-} 
- 
-void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask, 
-                                         ArrayRef<uint8_t> ShadowBytes, 
-                                         IRBuilder<> &IRB, Value *ShadowBase) { 
-  copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), IRB, ShadowBase); 
-} 
- 
-void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask, 
-                                         ArrayRef<uint8_t> ShadowBytes, 
-                                         size_t Begin, size_t End, 
-                                         IRBuilder<> &IRB, Value *ShadowBase) { 
-  assert(ShadowMask.size() == ShadowBytes.size()); 
-  size_t Done = Begin; 
-  for (size_t i = Begin, j = Begin + 1; i < End; i = j++) { 
-    if (!ShadowMask[i]) { 
-      assert(!ShadowBytes[i]); 
-      continue; 
-    } 
-    uint8_t Val = ShadowBytes[i]; 
-    if (!AsanSetShadowFunc[Val]) 
-      continue; 
- 
-    // Skip same values. 
-    for (; j < End && ShadowMask[j] && Val == ShadowBytes[j]; ++j) { 
-    } 
- 
-    if (j - i >= ClMaxInlinePoisoningSize) { 
-      copyToShadowInline(ShadowMask, ShadowBytes, Done, i, IRB, ShadowBase); 
-      IRB.CreateCall(AsanSetShadowFunc[Val], 
-                     {IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)), 
-                      ConstantInt::get(IntptrTy, j - i)}); 
-      Done = j; 
-    } 
-  } 
- 
-  copyToShadowInline(ShadowMask, ShadowBytes, Done, End, IRB, ShadowBase); 
-} 
- 
-// Fake stack allocator (asan_fake_stack.h) has 11 size classes 
-// for every power of 2 from kMinStackMallocSize to kMaxAsanStackMallocSizeClass 
-static int StackMallocSizeClass(uint64_t LocalStackSize) { 
-  assert(LocalStackSize <= kMaxStackMallocSize); 
-  uint64_t MaxSize = kMinStackMallocSize; 
-  for (int i = 0;; i++, MaxSize *= 2) 
-    if (LocalStackSize <= MaxSize) return i; 
-  llvm_unreachable("impossible LocalStackSize"); 
-} 
- 
-void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { 
-  Instruction *CopyInsertPoint = &F.front().front(); 
-  if (CopyInsertPoint == ASan.LocalDynamicShadow) { 
-    // Insert after the dynamic shadow location is determined 
-    CopyInsertPoint = CopyInsertPoint->getNextNode(); 
-    assert(CopyInsertPoint); 
-  } 
-  IRBuilder<> IRB(CopyInsertPoint); 
-  const DataLayout &DL = F.getParent()->getDataLayout(); 
-  for (Argument &Arg : F.args()) { 
-    if (Arg.hasByValAttr()) { 
-      Type *Ty = Arg.getParamByValType(); 
-      const Align Alignment = 
-          DL.getValueOrABITypeAlignment(Arg.getParamAlign(), Ty); 
- 
-      AllocaInst *AI = IRB.CreateAlloca( 
-          Ty, nullptr, 
-          (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) + 
-              ".byval"); 
-      AI->setAlignment(Alignment); 
-      Arg.replaceAllUsesWith(AI); 
- 
-      uint64_t AllocSize = DL.getTypeAllocSize(Ty); 
-      IRB.CreateMemCpy(AI, Alignment, &Arg, Alignment, AllocSize); 
-    } 
-  } 
-} 
- 
-PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond, 
-                                          Value *ValueIfTrue, 
-                                          Instruction *ThenTerm, 
-                                          Value *ValueIfFalse) { 
-  PHINode *PHI = IRB.CreatePHI(IntptrTy, 2); 
-  BasicBlock *CondBlock = cast<Instruction>(Cond)->getParent(); 
-  PHI->addIncoming(ValueIfFalse, CondBlock); 
-  BasicBlock *ThenBlock = ThenTerm->getParent(); 
-  PHI->addIncoming(ValueIfTrue, ThenBlock); 
-  return PHI; 
-} 
- 
-Value *FunctionStackPoisoner::createAllocaForLayout( 
-    IRBuilder<> &IRB, const ASanStackFrameLayout &L, bool Dynamic) { 
-  AllocaInst *Alloca; 
-  if (Dynamic) { 
-    Alloca = IRB.CreateAlloca(IRB.getInt8Ty(), 
-                              ConstantInt::get(IRB.getInt64Ty(), L.FrameSize), 
-                              "MyAlloca"); 
-  } else { 
-    Alloca = IRB.CreateAlloca(ArrayType::get(IRB.getInt8Ty(), L.FrameSize), 
-                              nullptr, "MyAlloca"); 
-    assert(Alloca->isStaticAlloca()); 
-  } 
-  assert((ClRealignStack & (ClRealignStack - 1)) == 0); 
-  size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack); 
-  Alloca->setAlignment(Align(FrameAlignment)); 
-  return IRB.CreatePointerCast(Alloca, IntptrTy); 
-} 
- 
-void FunctionStackPoisoner::createDynamicAllocasInitStorage() { 
-  BasicBlock &FirstBB = *F.begin(); 
-  IRBuilder<> IRB(dyn_cast<Instruction>(FirstBB.begin())); 
-  DynamicAllocaLayout = IRB.CreateAlloca(IntptrTy, nullptr); 
-  IRB.CreateStore(Constant::getNullValue(IntptrTy), DynamicAllocaLayout); 
-  DynamicAllocaLayout->setAlignment(Align(32)); 
-} 
- 
-void FunctionStackPoisoner::processDynamicAllocas() { 
-  if (!ClInstrumentDynamicAllocas || DynamicAllocaVec.empty()) { 
-    assert(DynamicAllocaPoisonCallVec.empty()); 
-    return; 
-  } 
- 
-  // Insert poison calls for lifetime intrinsics for dynamic allocas. 
-  for (const auto &APC : DynamicAllocaPoisonCallVec) { 
-    assert(APC.InsBefore); 
-    assert(APC.AI); 
-    assert(ASan.isInterestingAlloca(*APC.AI)); 
-    assert(!APC.AI->isStaticAlloca()); 
- 
-    IRBuilder<> IRB(APC.InsBefore); 
-    poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison); 
-    // Dynamic allocas will be unpoisoned unconditionally below in 
-    // unpoisonDynamicAllocas. 
-    // Flag that we need unpoison static allocas. 
-  } 
- 
-  // Handle dynamic allocas. 
-  createDynamicAllocasInitStorage(); 
-  for (auto &AI : DynamicAllocaVec) 
-    handleDynamicAllocaCall(AI); 
-  unpoisonDynamicAllocas(); 
-} 
- 
-/// Collect instructions in the entry block after \p InsBefore which initialize 
-/// permanent storage for a function argument. These instructions must remain in 
-/// the entry block so that uninitialized values do not appear in backtraces. An 
-/// added benefit is that this conserves spill slots. This does not move stores 
-/// before instrumented / "interesting" allocas. 
-static void findStoresToUninstrumentedArgAllocas( 
-    AddressSanitizer &ASan, Instruction &InsBefore, 
-    SmallVectorImpl<Instruction *> &InitInsts) { 
-  Instruction *Start = InsBefore.getNextNonDebugInstruction(); 
-  for (Instruction *It = Start; It; It = It->getNextNonDebugInstruction()) { 
-    // Argument initialization looks like: 
-    // 1) store <Argument>, <Alloca> OR 
-    // 2) <CastArgument> = cast <Argument> to ... 
-    //    store <CastArgument> to <Alloca> 
-    // Do not consider any other kind of instruction. 
-    // 
-    // Note: This covers all known cases, but may not be exhaustive. An 
-    // alternative to pattern-matching stores is to DFS over all Argument uses: 
-    // this might be more general, but is probably much more complicated. 
-    if (isa<AllocaInst>(It) || isa<CastInst>(It)) 
-      continue; 
-    if (auto *Store = dyn_cast<StoreInst>(It)) { 
-      // The store destination must be an alloca that isn't interesting for 
-      // ASan to instrument. These are moved up before InsBefore, and they're 
-      // not interesting because allocas for arguments can be mem2reg'd. 
-      auto *Alloca = dyn_cast<AllocaInst>(Store->getPointerOperand()); 
-      if (!Alloca || ASan.isInterestingAlloca(*Alloca)) 
-        continue; 
- 
-      Value *Val = Store->getValueOperand(); 
-      bool IsDirectArgInit = isa<Argument>(Val); 
-      bool IsArgInitViaCast = 
-          isa<CastInst>(Val) && 
-          isa<Argument>(cast<CastInst>(Val)->getOperand(0)) && 
-          // Check that the cast appears directly before the store. Otherwise 
-          // moving the cast before InsBefore may break the IR. 
-          Val == It->getPrevNonDebugInstruction(); 
-      bool IsArgInit = IsDirectArgInit || IsArgInitViaCast; 
-      if (!IsArgInit) 
-        continue; 
- 
-      if (IsArgInitViaCast) 
-        InitInsts.push_back(cast<Instruction>(Val)); 
-      InitInsts.push_back(Store); 
-      continue; 
-    } 
- 
-    // Do not reorder past unknown instructions: argument initialization should 
-    // only involve casts and stores. 
-    return; 
-  } 
-} 
- 
-void FunctionStackPoisoner::processStaticAllocas() { 
-  if (AllocaVec.empty()) { 
-    assert(StaticAllocaPoisonCallVec.empty()); 
-    return; 
-  } 
- 
-  int StackMallocIdx = -1; 
-  DebugLoc EntryDebugLocation; 
-  if (auto SP = F.getSubprogram()) 
+
+    Value *Indices2[2];
+    Indices2[0] = IRB.getInt32(0);
+    Indices2[1] = IRB.getInt32(0);
+
+    G->replaceAllUsesWith(
+        ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true));
+    NewGlobal->takeName(G);
+    G->eraseFromParent();
+    NewGlobals[i] = NewGlobal;
+
+    Constant *SourceLoc;
+    if (!MD.SourceLoc.empty()) {
+      auto SourceLocGlobal = createPrivateGlobalForSourceLoc(M, MD.SourceLoc);
+      SourceLoc = ConstantExpr::getPointerCast(SourceLocGlobal, IntptrTy);
+    } else {
+      SourceLoc = ConstantInt::get(IntptrTy, 0);
+    }
+
+    Constant *ODRIndicator = ConstantExpr::getNullValue(IRB.getInt8PtrTy());
+    GlobalValue *InstrumentedGlobal = NewGlobal;
+
+    bool CanUsePrivateAliases =
+        TargetTriple.isOSBinFormatELF() || TargetTriple.isOSBinFormatMachO() ||
+        TargetTriple.isOSBinFormatWasm();
+    if (CanUsePrivateAliases && UsePrivateAlias) {
+      // Create local alias for NewGlobal to avoid crash on ODR between
+      // instrumented and non-instrumented libraries.
+      InstrumentedGlobal =
+          GlobalAlias::create(GlobalValue::PrivateLinkage, "", NewGlobal);
+    }
+
+    // ODR should not happen for local linkage.
+    if (NewGlobal->hasLocalLinkage()) {
+      ODRIndicator = ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, -1),
+                                               IRB.getInt8PtrTy());
+    } else if (UseOdrIndicator) {
+      // With local aliases, we need to provide another externally visible
+      // symbol __odr_asan_XXX to detect ODR violation.
+      auto *ODRIndicatorSym =
+          new GlobalVariable(M, IRB.getInt8Ty(), false, Linkage,
+                             Constant::getNullValue(IRB.getInt8Ty()),
+                             kODRGenPrefix + NameForGlobal, nullptr,
+                             NewGlobal->getThreadLocalMode());
+
+      // Set meaningful attributes for indicator symbol.
+      ODRIndicatorSym->setVisibility(NewGlobal->getVisibility());
+      ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass());
+      ODRIndicatorSym->setAlignment(Align(1));
+      ODRIndicator = ODRIndicatorSym;
+    }
+
+    Constant *Initializer = ConstantStruct::get(
+        GlobalStructTy,
+        ConstantExpr::getPointerCast(InstrumentedGlobal, IntptrTy),
+        ConstantInt::get(IntptrTy, SizeInBytes),
+        ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
+        ConstantExpr::getPointerCast(Name, IntptrTy),
+        ConstantExpr::getPointerCast(ModuleName, IntptrTy),
+        ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc,
+        ConstantExpr::getPointerCast(ODRIndicator, IntptrTy));
+
+    if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true;
+
+    LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
+
+    Initializers[i] = Initializer;
+  }
+
+  // Add instrumented globals to llvm.compiler.used list to avoid LTO from
+  // ConstantMerge'ing them.
+  SmallVector<GlobalValue *, 16> GlobalsToAddToUsedList;
+  for (size_t i = 0; i < n; i++) {
+    GlobalVariable *G = NewGlobals[i];
+    if (G->getName().empty()) continue;
+    GlobalsToAddToUsedList.push_back(G);
+  }
+  appendToCompilerUsed(M, ArrayRef<GlobalValue *>(GlobalsToAddToUsedList));
+
+  std::string ELFUniqueModuleId =
+      (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M)
+                                                        : "";
+
+  if (!ELFUniqueModuleId.empty()) {
+    InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId);
+    *CtorComdat = true;
+  } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
+    InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);
+  } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) {
+    InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers);
+  } else {
+    InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers);
+  }
+
+  // Create calls for poisoning before initializers run and unpoisoning after.
+  if (HasDynamicallyInitializedGlobals)
+    createInitializerPoisonCalls(M, ModuleName);
+
+  LLVM_DEBUG(dbgs() << M);
+  return true;
+}
+
+uint64_t
+ModuleAddressSanitizer::getRedzoneSizeForGlobal(uint64_t SizeInBytes) const {
+  constexpr uint64_t kMaxRZ = 1 << 18;
+  const uint64_t MinRZ = getMinRedzoneSizeForGlobal();
+
+  // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
+  uint64_t RZ =
+      std::max(MinRZ, std::min(kMaxRZ, (SizeInBytes / MinRZ / 4) * MinRZ));
+
+  // Round up to multiple of MinRZ.
+  if (SizeInBytes % MinRZ)
+    RZ += MinRZ - (SizeInBytes % MinRZ);
+  assert((RZ + SizeInBytes) % MinRZ == 0);
+
+  return RZ;
+}
+
+int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const {
+  int LongSize = M.getDataLayout().getPointerSizeInBits();
+  bool isAndroid = Triple(M.getTargetTriple()).isAndroid();
+  int Version = 8;
+  // 32-bit Android is one version ahead because of the switch to dynamic
+  // shadow.
+  Version += (LongSize == 32 && isAndroid);
+  return Version;
+}
+
+bool ModuleAddressSanitizer::instrumentModule(Module &M) {
+  initializeCallbacks(M);
+
+  // Create a module constructor. A destructor is created lazily because not all
+  // platforms, and not all modules need it.
+  if (CompileKernel) {
+    // The kernel always builds with its own runtime, and therefore does not
+    // need the init and version check calls.
+    AsanCtorFunction = createSanitizerCtor(M, kAsanModuleCtorName);
+  } else {
+    std::string AsanVersion = std::to_string(GetAsanVersion(M));
+    std::string VersionCheckName =
+        ClInsertVersionCheck ? (kAsanVersionCheckNamePrefix + AsanVersion) : "";
+    std::tie(AsanCtorFunction, std::ignore) =
+        createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName,
+                                            kAsanInitName, /*InitArgTypes=*/{},
+                                            /*InitArgs=*/{}, VersionCheckName);
+  }
+
+  bool CtorComdat = true;
+  if (ClGlobals) {
+    IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator());
+    InstrumentGlobals(IRB, M, &CtorComdat);
+  }
+
+  const uint64_t Priority = GetCtorAndDtorPriority(TargetTriple);
+
+  // Put the constructor and destructor in comdat if both
+  // (1) global instrumentation is not TU-specific
+  // (2) target is ELF.
+  if (UseCtorComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) {
+    AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName));
+    appendToGlobalCtors(M, AsanCtorFunction, Priority, AsanCtorFunction);
+    if (AsanDtorFunction) {
+      AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName));
+      appendToGlobalDtors(M, AsanDtorFunction, Priority, AsanDtorFunction);
+    }
+  } else {
+    appendToGlobalCtors(M, AsanCtorFunction, Priority);
+    if (AsanDtorFunction)
+      appendToGlobalDtors(M, AsanDtorFunction, Priority);
+  }
+
+  return true;
+}
+
+void AddressSanitizer::initializeCallbacks(Module &M) {
+  IRBuilder<> IRB(*C);
+  // Create __asan_report* callbacks.
+  // IsWrite, TypeSize and Exp are encoded in the function name.
+  for (int Exp = 0; Exp < 2; Exp++) {
+    for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
+      const std::string TypeStr = AccessIsWrite ? "store" : "load";
+      const std::string ExpStr = Exp ? "exp_" : "";
+      const std::string EndingStr = Recover ? "_noabort" : "";
+
+      SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
+      SmallVector<Type *, 2> Args1{1, IntptrTy};
+      if (Exp) {
+        Type *ExpType = Type::getInt32Ty(*C);
+        Args2.push_back(ExpType);
+        Args1.push_back(ExpType);
+      }
+      AsanErrorCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction(
+          kAsanReportErrorTemplate + ExpStr + TypeStr + "_n" + EndingStr,
+          FunctionType::get(IRB.getVoidTy(), Args2, false));
+
+      AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction(
+          ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr,
+          FunctionType::get(IRB.getVoidTy(), Args2, false));
+
+      for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+           AccessSizeIndex++) {
+        const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex);
+        AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+            M.getOrInsertFunction(
+                kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr,
+                FunctionType::get(IRB.getVoidTy(), Args1, false));
+
+        AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+            M.getOrInsertFunction(
+                ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr,
+                FunctionType::get(IRB.getVoidTy(), Args1, false));
+      }
+    }
+  }
+
+  const std::string MemIntrinCallbackPrefix =
+      CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
+  AsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
+                                      IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                      IRB.getInt8PtrTy(), IntptrTy);
+  AsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
+                                     IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                     IRB.getInt8PtrTy(), IntptrTy);
+  AsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
+                                     IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                     IRB.getInt32Ty(), IntptrTy);
+
+  AsanHandleNoReturnFunc =
+      M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy());
+
+  AsanPtrCmpFunction =
+      M.getOrInsertFunction(kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy);
+  AsanPtrSubFunction =
+      M.getOrInsertFunction(kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy);
+  if (Mapping.InGlobal)
+    AsanShadowGlobal = M.getOrInsertGlobal("__asan_shadow",
+                                           ArrayType::get(IRB.getInt8Ty(), 0));
+}
+
+bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
+  // For each NSObject descendant having a +load method, this method is invoked
+  // by the ObjC runtime before any of the static constructors is called.
+  // Therefore we need to instrument such methods with a call to __asan_init
+  // at the beginning in order to initialize our runtime before any access to
+  // the shadow memory.
+  // We cannot just ignore these methods, because they may call other
+  // instrumented functions.
+  if (F.getName().find(" load]") != std::string::npos) {
+    FunctionCallee AsanInitFunction =
+        declareSanitizerInitFunction(*F.getParent(), kAsanInitName, {});
+    IRBuilder<> IRB(&F.front(), F.front().begin());
+    IRB.CreateCall(AsanInitFunction, {});
+    return true;
+  }
+  return false;
+}
+
+bool AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) {
+  // Generate code only when dynamic addressing is needed.
+  if (Mapping.Offset != kDynamicShadowSentinel)
+    return false;
+
+  IRBuilder<> IRB(&F.front().front());
+  if (Mapping.InGlobal) {
+    if (ClWithIfuncSuppressRemat) {
+      // An empty inline asm with input reg == output reg.
+      // An opaque pointer-to-int cast, basically.
+      InlineAsm *Asm = InlineAsm::get(
+          FunctionType::get(IntptrTy, {AsanShadowGlobal->getType()}, false),
+          StringRef(""), StringRef("=r,0"),
+          /*hasSideEffects=*/false);
+      LocalDynamicShadow =
+          IRB.CreateCall(Asm, {AsanShadowGlobal}, ".asan.shadow");
+    } else {
+      LocalDynamicShadow =
+          IRB.CreatePointerCast(AsanShadowGlobal, IntptrTy, ".asan.shadow");
+    }
+  } else {
+    Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
+        kAsanShadowMemoryDynamicAddress, IntptrTy);
+    LocalDynamicShadow = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
+  }
+  return true;
+}
+
+void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
+  // Find the one possible call to llvm.localescape and pre-mark allocas passed
+  // to it as uninteresting. This assumes we haven't started processing allocas
+  // yet. This check is done up front because iterating the use list in
+  // isInterestingAlloca would be algorithmically slower.
+  assert(ProcessedAllocas.empty() && "must process localescape before allocas");
+
+  // Try to get the declaration of llvm.localescape. If it's not in the module,
+  // we can exit early.
+  if (!F.getParent()->getFunction("llvm.localescape")) return;
+
+  // Look for a call to llvm.localescape call in the entry block. It can't be in
+  // any other block.
+  for (Instruction &I : F.getEntryBlock()) {
+    IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+    if (II && II->getIntrinsicID() == Intrinsic::localescape) {
+      // We found a call. Mark all the allocas passed in as uninteresting.
+      for (Value *Arg : II->arg_operands()) {
+        AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
+        assert(AI && AI->isStaticAlloca() &&
+               "non-static alloca arg to localescape");
+        ProcessedAllocas[AI] = false;
+      }
+      break;
+    }
+  }
+}
+
+bool AddressSanitizer::suppressInstrumentationSiteForDebug(int &Instrumented) {
+  bool ShouldInstrument =
+      ClDebugMin < 0 || ClDebugMax < 0 ||
+      (Instrumented >= ClDebugMin && Instrumented <= ClDebugMax);
+  Instrumented++;
+  return !ShouldInstrument;
+}
+
+bool AddressSanitizer::instrumentFunction(Function &F,
+                                          const TargetLibraryInfo *TLI) {
+  if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
+  if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false;
+  if (F.getName().startswith("__asan_")) return false;
+
+  bool FunctionModified = false;
+
+  // If needed, insert __asan_init before checking for SanitizeAddress attr.
+  // This function needs to be called even if the function body is not
+  // instrumented.
+  if (maybeInsertAsanInitAtFunctionEntry(F))
+    FunctionModified = true;
+
+  // Leave if the function doesn't need instrumentation.
+  if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified;
+
+  LLVM_DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
+
+  initializeCallbacks(*F.getParent());
+
+  FunctionStateRAII CleanupObj(this);
+
+  FunctionModified |= maybeInsertDynamicShadowAtFunctionEntry(F);
+
+  // We can't instrument allocas used with llvm.localescape. Only static allocas
+  // can be passed to that intrinsic.
+  markEscapedLocalAllocas(F);
+
+  // We want to instrument every address only once per basic block (unless there
+  // are calls between uses).
+  SmallPtrSet<Value *, 16> TempsToInstrument;
+  SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
+  SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
+  SmallVector<Instruction *, 8> NoReturnCalls;
+  SmallVector<BasicBlock *, 16> AllBlocks;
+  SmallVector<Instruction *, 16> PointerComparisonsOrSubtracts;
+  int NumAllocas = 0;
+
+  // Fill the set of memory operations to instrument.
+  for (auto &BB : F) {
+    AllBlocks.push_back(&BB);
+    TempsToInstrument.clear();
+    int NumInsnsPerBB = 0;
+    for (auto &Inst : BB) {
+      if (LooksLikeCodeInBug11395(&Inst)) return false;
+      SmallVector<InterestingMemoryOperand, 1> InterestingOperands;
+      getInterestingMemoryOperands(&Inst, InterestingOperands);
+
+      if (!InterestingOperands.empty()) {
+        for (auto &Operand : InterestingOperands) {
+          if (ClOpt && ClOptSameTemp) {
+            Value *Ptr = Operand.getPtr();
+            // If we have a mask, skip instrumentation if we've already
+            // instrumented the full object. But don't add to TempsToInstrument
+            // because we might get another load/store with a different mask.
+            if (Operand.MaybeMask) {
+              if (TempsToInstrument.count(Ptr))
+                continue; // We've seen this (whole) temp in the current BB.
+            } else {
+              if (!TempsToInstrument.insert(Ptr).second)
+                continue; // We've seen this temp in the current BB.
+            }
+          }
+          OperandsToInstrument.push_back(Operand);
+          NumInsnsPerBB++;
+        }
+      } else if (((ClInvalidPointerPairs || ClInvalidPointerCmp) &&
+                  isInterestingPointerComparison(&Inst)) ||
+                 ((ClInvalidPointerPairs || ClInvalidPointerSub) &&
+                  isInterestingPointerSubtraction(&Inst))) {
+        PointerComparisonsOrSubtracts.push_back(&Inst);
+      } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) {
+        // ok, take it.
+        IntrinToInstrument.push_back(MI);
+        NumInsnsPerBB++;
+      } else {
+        if (isa<AllocaInst>(Inst)) NumAllocas++;
+        if (auto *CB = dyn_cast<CallBase>(&Inst)) {
+          // A call inside BB.
+          TempsToInstrument.clear();
+          if (CB->doesNotReturn() && !CB->hasMetadata("nosanitize"))
+            NoReturnCalls.push_back(CB);
+        }
+        if (CallInst *CI = dyn_cast<CallInst>(&Inst))
+          maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
+      }
+      if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break;
+    }
+  }
+
+  bool UseCalls = (ClInstrumentationWithCallsThreshold >= 0 &&
+                   OperandsToInstrument.size() + IntrinToInstrument.size() >
+                       (unsigned)ClInstrumentationWithCallsThreshold);
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  ObjectSizeOpts ObjSizeOpts;
+  ObjSizeOpts.RoundToAlign = true;
+  ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(), ObjSizeOpts);
+
+  // Instrument.
+  int NumInstrumented = 0;
+  for (auto &Operand : OperandsToInstrument) {
+    if (!suppressInstrumentationSiteForDebug(NumInstrumented))
+      instrumentMop(ObjSizeVis, Operand, UseCalls,
+                    F.getParent()->getDataLayout());
+    FunctionModified = true;
+  }
+  for (auto Inst : IntrinToInstrument) {
+    if (!suppressInstrumentationSiteForDebug(NumInstrumented))
+      instrumentMemIntrinsic(Inst);
+    FunctionModified = true;
+  }
+
+  FunctionStackPoisoner FSP(F, *this);
+  bool ChangedStack = FSP.runOnFunction();
+
+  // We must unpoison the stack before NoReturn calls (throw, _exit, etc).
+  // See e.g. https://github.com/google/sanitizers/issues/37
+  for (auto CI : NoReturnCalls) {
+    IRBuilder<> IRB(CI);
+    IRB.CreateCall(AsanHandleNoReturnFunc, {});
+  }
+
+  for (auto Inst : PointerComparisonsOrSubtracts) {
+    instrumentPointerComparisonOrSubtraction(Inst);
+    FunctionModified = true;
+  }
+
+  if (ChangedStack || !NoReturnCalls.empty())
+    FunctionModified = true;
+
+  LLVM_DEBUG(dbgs() << "ASAN done instrumenting: " << FunctionModified << " "
+                    << F << "\n");
+
+  return FunctionModified;
+}
+
+// Workaround for bug 11395: we don't want to instrument stack in functions
+// with large assembly blobs (32-bit only), otherwise reg alloc may crash.
+// FIXME: remove once the bug 11395 is fixed.
+bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
+  if (LongSize != 32) return false;
+  CallInst *CI = dyn_cast<CallInst>(I);
+  if (!CI || !CI->isInlineAsm()) return false;
+  if (CI->getNumArgOperands() <= 5) return false;
+  // We have inline assembly with quite a few arguments.
+  return true;
+}
+
+void FunctionStackPoisoner::initializeCallbacks(Module &M) {
+  IRBuilder<> IRB(*C);
+  for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
+    std::string Suffix = itostr(i);
+    AsanStackMallocFunc[i] = M.getOrInsertFunction(
+        kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy);
+    AsanStackFreeFunc[i] =
+        M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix,
+                              IRB.getVoidTy(), IntptrTy, IntptrTy);
+  }
+  if (ASan.UseAfterScope) {
+    AsanPoisonStackMemoryFunc = M.getOrInsertFunction(
+        kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+    AsanUnpoisonStackMemoryFunc = M.getOrInsertFunction(
+        kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+  }
+
+  for (size_t Val : {0x00, 0xf1, 0xf2, 0xf3, 0xf5, 0xf8}) {
+    std::ostringstream Name;
+    Name << kAsanSetShadowPrefix;
+    Name << std::setw(2) << std::setfill('0') << std::hex << Val;
+    AsanSetShadowFunc[Val] =
+        M.getOrInsertFunction(Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy);
+  }
+
+  AsanAllocaPoisonFunc = M.getOrInsertFunction(
+      kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
+  AsanAllocasUnpoisonFunc = M.getOrInsertFunction(
+      kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
+}
+
+void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
+                                               ArrayRef<uint8_t> ShadowBytes,
+                                               size_t Begin, size_t End,
+                                               IRBuilder<> &IRB,
+                                               Value *ShadowBase) {
+  if (Begin >= End)
+    return;
+
+  const size_t LargestStoreSizeInBytes =
+      std::min<size_t>(sizeof(uint64_t), ASan.LongSize / 8);
+
+  const bool IsLittleEndian = F.getParent()->getDataLayout().isLittleEndian();
+
+  // Poison given range in shadow using larges store size with out leading and
+  // trailing zeros in ShadowMask. Zeros never change, so they need neither
+  // poisoning nor up-poisoning. Still we don't mind if some of them get into a
+  // middle of a store.
+  for (size_t i = Begin; i < End;) {
+    if (!ShadowMask[i]) {
+      assert(!ShadowBytes[i]);
+      ++i;
+      continue;
+    }
+
+    size_t StoreSizeInBytes = LargestStoreSizeInBytes;
+    // Fit store size into the range.
+    while (StoreSizeInBytes > End - i)
+      StoreSizeInBytes /= 2;
+
+    // Minimize store size by trimming trailing zeros.
+    for (size_t j = StoreSizeInBytes - 1; j && !ShadowMask[i + j]; --j) {
+      while (j <= StoreSizeInBytes / 2)
+        StoreSizeInBytes /= 2;
+    }
+
+    uint64_t Val = 0;
+    for (size_t j = 0; j < StoreSizeInBytes; j++) {
+      if (IsLittleEndian)
+        Val |= (uint64_t)ShadowBytes[i + j] << (8 * j);
+      else
+        Val = (Val << 8) | ShadowBytes[i + j];
+    }
+
+    Value *Ptr = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
+    Value *Poison = IRB.getIntN(StoreSizeInBytes * 8, Val);
+    IRB.CreateAlignedStore(
+        Poison, IRB.CreateIntToPtr(Ptr, Poison->getType()->getPointerTo()),
+        Align(1));
+
+    i += StoreSizeInBytes;
+  }
+}
+
+void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
+                                         ArrayRef<uint8_t> ShadowBytes,
+                                         IRBuilder<> &IRB, Value *ShadowBase) {
+  copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), IRB, ShadowBase);
+}
+
+void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
+                                         ArrayRef<uint8_t> ShadowBytes,
+                                         size_t Begin, size_t End,
+                                         IRBuilder<> &IRB, Value *ShadowBase) {
+  assert(ShadowMask.size() == ShadowBytes.size());
+  size_t Done = Begin;
+  for (size_t i = Begin, j = Begin + 1; i < End; i = j++) {
+    if (!ShadowMask[i]) {
+      assert(!ShadowBytes[i]);
+      continue;
+    }
+    uint8_t Val = ShadowBytes[i];
+    if (!AsanSetShadowFunc[Val])
+      continue;
+
+    // Skip same values.
+    for (; j < End && ShadowMask[j] && Val == ShadowBytes[j]; ++j) {
+    }
+
+    if (j - i >= ClMaxInlinePoisoningSize) {
+      copyToShadowInline(ShadowMask, ShadowBytes, Done, i, IRB, ShadowBase);
+      IRB.CreateCall(AsanSetShadowFunc[Val],
+                     {IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)),
+                      ConstantInt::get(IntptrTy, j - i)});
+      Done = j;
+    }
+  }
+
+  copyToShadowInline(ShadowMask, ShadowBytes, Done, End, IRB, ShadowBase);
+}
+
+// Fake stack allocator (asan_fake_stack.h) has 11 size classes
+// for every power of 2 from kMinStackMallocSize to kMaxAsanStackMallocSizeClass
+static int StackMallocSizeClass(uint64_t LocalStackSize) {
+  assert(LocalStackSize <= kMaxStackMallocSize);
+  uint64_t MaxSize = kMinStackMallocSize;
+  for (int i = 0;; i++, MaxSize *= 2)
+    if (LocalStackSize <= MaxSize) return i;
+  llvm_unreachable("impossible LocalStackSize");
+}
+
+void FunctionStackPoisoner::copyArgsPassedByValToAllocas() {
+  Instruction *CopyInsertPoint = &F.front().front();
+  if (CopyInsertPoint == ASan.LocalDynamicShadow) {
+    // Insert after the dynamic shadow location is determined
+    CopyInsertPoint = CopyInsertPoint->getNextNode();
+    assert(CopyInsertPoint);
+  }
+  IRBuilder<> IRB(CopyInsertPoint);
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  for (Argument &Arg : F.args()) {
+    if (Arg.hasByValAttr()) {
+      Type *Ty = Arg.getParamByValType();
+      const Align Alignment =
+          DL.getValueOrABITypeAlignment(Arg.getParamAlign(), Ty);
+
+      AllocaInst *AI = IRB.CreateAlloca(
+          Ty, nullptr,
+          (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) +
+              ".byval");
+      AI->setAlignment(Alignment);
+      Arg.replaceAllUsesWith(AI);
+
+      uint64_t AllocSize = DL.getTypeAllocSize(Ty);
+      IRB.CreateMemCpy(AI, Alignment, &Arg, Alignment, AllocSize);
+    }
+  }
+}
+
+PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond,
+                                          Value *ValueIfTrue,
+                                          Instruction *ThenTerm,
+                                          Value *ValueIfFalse) {
+  PHINode *PHI = IRB.CreatePHI(IntptrTy, 2);
+  BasicBlock *CondBlock = cast<Instruction>(Cond)->getParent();
+  PHI->addIncoming(ValueIfFalse, CondBlock);
+  BasicBlock *ThenBlock = ThenTerm->getParent();
+  PHI->addIncoming(ValueIfTrue, ThenBlock);
+  return PHI;
+}
+
+Value *FunctionStackPoisoner::createAllocaForLayout(
+    IRBuilder<> &IRB, const ASanStackFrameLayout &L, bool Dynamic) {
+  AllocaInst *Alloca;
+  if (Dynamic) {
+    Alloca = IRB.CreateAlloca(IRB.getInt8Ty(),
+                              ConstantInt::get(IRB.getInt64Ty(), L.FrameSize),
+                              "MyAlloca");
+  } else {
+    Alloca = IRB.CreateAlloca(ArrayType::get(IRB.getInt8Ty(), L.FrameSize),
+                              nullptr, "MyAlloca");
+    assert(Alloca->isStaticAlloca());
+  }
+  assert((ClRealignStack & (ClRealignStack - 1)) == 0);
+  size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack);
+  Alloca->setAlignment(Align(FrameAlignment));
+  return IRB.CreatePointerCast(Alloca, IntptrTy);
+}
+
+void FunctionStackPoisoner::createDynamicAllocasInitStorage() {
+  BasicBlock &FirstBB = *F.begin();
+  IRBuilder<> IRB(dyn_cast<Instruction>(FirstBB.begin()));
+  DynamicAllocaLayout = IRB.CreateAlloca(IntptrTy, nullptr);
+  IRB.CreateStore(Constant::getNullValue(IntptrTy), DynamicAllocaLayout);
+  DynamicAllocaLayout->setAlignment(Align(32));
+}
+
+void FunctionStackPoisoner::processDynamicAllocas() {
+  if (!ClInstrumentDynamicAllocas || DynamicAllocaVec.empty()) {
+    assert(DynamicAllocaPoisonCallVec.empty());
+    return;
+  }
+
+  // Insert poison calls for lifetime intrinsics for dynamic allocas.
+  for (const auto &APC : DynamicAllocaPoisonCallVec) {
+    assert(APC.InsBefore);
+    assert(APC.AI);
+    assert(ASan.isInterestingAlloca(*APC.AI));
+    assert(!APC.AI->isStaticAlloca());
+
+    IRBuilder<> IRB(APC.InsBefore);
+    poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
+    // Dynamic allocas will be unpoisoned unconditionally below in
+    // unpoisonDynamicAllocas.
+    // Flag that we need unpoison static allocas.
+  }
+
+  // Handle dynamic allocas.
+  createDynamicAllocasInitStorage();
+  for (auto &AI : DynamicAllocaVec)
+    handleDynamicAllocaCall(AI);
+  unpoisonDynamicAllocas();
+}
+
+/// Collect instructions in the entry block after \p InsBefore which initialize
+/// permanent storage for a function argument. These instructions must remain in
+/// the entry block so that uninitialized values do not appear in backtraces. An
+/// added benefit is that this conserves spill slots. This does not move stores
+/// before instrumented / "interesting" allocas.
+static void findStoresToUninstrumentedArgAllocas(
+    AddressSanitizer &ASan, Instruction &InsBefore,
+    SmallVectorImpl<Instruction *> &InitInsts) {
+  Instruction *Start = InsBefore.getNextNonDebugInstruction();
+  for (Instruction *It = Start; It; It = It->getNextNonDebugInstruction()) {
+    // Argument initialization looks like:
+    // 1) store <Argument>, <Alloca> OR
+    // 2) <CastArgument> = cast <Argument> to ...
+    //    store <CastArgument> to <Alloca>
+    // Do not consider any other kind of instruction.
+    //
+    // Note: This covers all known cases, but may not be exhaustive. An
+    // alternative to pattern-matching stores is to DFS over all Argument uses:
+    // this might be more general, but is probably much more complicated.
+    if (isa<AllocaInst>(It) || isa<CastInst>(It))
+      continue;
+    if (auto *Store = dyn_cast<StoreInst>(It)) {
+      // The store destination must be an alloca that isn't interesting for
+      // ASan to instrument. These are moved up before InsBefore, and they're
+      // not interesting because allocas for arguments can be mem2reg'd.
+      auto *Alloca = dyn_cast<AllocaInst>(Store->getPointerOperand());
+      if (!Alloca || ASan.isInterestingAlloca(*Alloca))
+        continue;
+
+      Value *Val = Store->getValueOperand();
+      bool IsDirectArgInit = isa<Argument>(Val);
+      bool IsArgInitViaCast =
+          isa<CastInst>(Val) &&
+          isa<Argument>(cast<CastInst>(Val)->getOperand(0)) &&
+          // Check that the cast appears directly before the store. Otherwise
+          // moving the cast before InsBefore may break the IR.
+          Val == It->getPrevNonDebugInstruction();
+      bool IsArgInit = IsDirectArgInit || IsArgInitViaCast;
+      if (!IsArgInit)
+        continue;
+
+      if (IsArgInitViaCast)
+        InitInsts.push_back(cast<Instruction>(Val));
+      InitInsts.push_back(Store);
+      continue;
+    }
+
+    // Do not reorder past unknown instructions: argument initialization should
+    // only involve casts and stores.
+    return;
+  }
+}
+
+void FunctionStackPoisoner::processStaticAllocas() {
+  if (AllocaVec.empty()) {
+    assert(StaticAllocaPoisonCallVec.empty());
+    return;
+  }
+
+  int StackMallocIdx = -1;
+  DebugLoc EntryDebugLocation;
+  if (auto SP = F.getSubprogram())
     EntryDebugLocation =
         DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP);
- 
-  Instruction *InsBefore = AllocaVec[0]; 
-  IRBuilder<> IRB(InsBefore); 
- 
-  // Make sure non-instrumented allocas stay in the entry block. Otherwise, 
-  // debug info is broken, because only entry-block allocas are treated as 
-  // regular stack slots. 
-  auto InsBeforeB = InsBefore->getParent(); 
-  assert(InsBeforeB == &F.getEntryBlock()); 
-  for (auto *AI : StaticAllocasToMoveUp) 
-    if (AI->getParent() == InsBeforeB) 
-      AI->moveBefore(InsBefore); 
- 
-  // Move stores of arguments into entry-block allocas as well. This prevents 
-  // extra stack slots from being generated (to house the argument values until 
-  // they can be stored into the allocas). This also prevents uninitialized 
-  // values from being shown in backtraces. 
-  SmallVector<Instruction *, 8> ArgInitInsts; 
-  findStoresToUninstrumentedArgAllocas(ASan, *InsBefore, ArgInitInsts); 
-  for (Instruction *ArgInitInst : ArgInitInsts) 
-    ArgInitInst->moveBefore(InsBefore); 
- 
-  // If we have a call to llvm.localescape, keep it in the entry block. 
-  if (LocalEscapeCall) LocalEscapeCall->moveBefore(InsBefore); 
- 
-  SmallVector<ASanStackVariableDescription, 16> SVD; 
-  SVD.reserve(AllocaVec.size()); 
-  for (AllocaInst *AI : AllocaVec) { 
-    ASanStackVariableDescription D = {AI->getName().data(), 
-                                      ASan.getAllocaSizeInBytes(*AI), 
-                                      0, 
-                                      AI->getAlignment(), 
-                                      AI, 
-                                      0, 
-                                      0}; 
-    SVD.push_back(D); 
-  } 
- 
-  // Minimal header size (left redzone) is 4 pointers, 
-  // i.e. 32 bytes on 64-bit platforms and 16 bytes in 32-bit platforms. 
-  size_t Granularity = 1ULL << Mapping.Scale; 
-  size_t MinHeaderSize = std::max((size_t)ASan.LongSize / 2, Granularity); 
-  const ASanStackFrameLayout &L = 
-      ComputeASanStackFrameLayout(SVD, Granularity, MinHeaderSize); 
- 
-  // Build AllocaToSVDMap for ASanStackVariableDescription lookup. 
-  DenseMap<const AllocaInst *, ASanStackVariableDescription *> AllocaToSVDMap; 
-  for (auto &Desc : SVD) 
-    AllocaToSVDMap[Desc.AI] = &Desc; 
- 
-  // Update SVD with information from lifetime intrinsics. 
-  for (const auto &APC : StaticAllocaPoisonCallVec) { 
-    assert(APC.InsBefore); 
-    assert(APC.AI); 
-    assert(ASan.isInterestingAlloca(*APC.AI)); 
-    assert(APC.AI->isStaticAlloca()); 
- 
-    ASanStackVariableDescription &Desc = *AllocaToSVDMap[APC.AI]; 
-    Desc.LifetimeSize = Desc.Size; 
-    if (const DILocation *FnLoc = EntryDebugLocation.get()) { 
-      if (const DILocation *LifetimeLoc = APC.InsBefore->getDebugLoc().get()) { 
-        if (LifetimeLoc->getFile() == FnLoc->getFile()) 
-          if (unsigned Line = LifetimeLoc->getLine()) 
-            Desc.Line = std::min(Desc.Line ? Desc.Line : Line, Line); 
-      } 
-    } 
-  } 
- 
-  auto DescriptionString = ComputeASanStackFrameDescription(SVD); 
-  LLVM_DEBUG(dbgs() << DescriptionString << " --- " << L.FrameSize << "\n"); 
-  uint64_t LocalStackSize = L.FrameSize; 
-  bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel && 
-                       LocalStackSize <= kMaxStackMallocSize; 
-  bool DoDynamicAlloca = ClDynamicAllocaStack; 
-  // Don't do dynamic alloca or stack malloc if: 
-  // 1) There is inline asm: too often it makes assumptions on which registers 
-  //    are available. 
-  // 2) There is a returns_twice call (typically setjmp), which is 
-  //    optimization-hostile, and doesn't play well with introduced indirect 
-  //    register-relative calculation of local variable addresses. 
-  DoDynamicAlloca &= !HasInlineAsm && !HasReturnsTwiceCall; 
-  DoStackMalloc &= !HasInlineAsm && !HasReturnsTwiceCall; 
- 
-  Value *StaticAlloca = 
-      DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false); 
- 
-  Value *FakeStack; 
-  Value *LocalStackBase; 
-  Value *LocalStackBaseAlloca; 
-  uint8_t DIExprFlags = DIExpression::ApplyOffset; 
- 
-  if (DoStackMalloc) { 
-    LocalStackBaseAlloca = 
-        IRB.CreateAlloca(IntptrTy, nullptr, "asan_local_stack_base"); 
-    // void *FakeStack = __asan_option_detect_stack_use_after_return 
-    //     ? __asan_stack_malloc_N(LocalStackSize) 
-    //     : nullptr; 
-    // void *LocalStackBase = (FakeStack) ? FakeStack : alloca(LocalStackSize); 
-    Constant *OptionDetectUseAfterReturn = F.getParent()->getOrInsertGlobal( 
-        kAsanOptionDetectUseAfterReturn, IRB.getInt32Ty()); 
-    Value *UseAfterReturnIsEnabled = IRB.CreateICmpNE( 
-        IRB.CreateLoad(IRB.getInt32Ty(), OptionDetectUseAfterReturn), 
-        Constant::getNullValue(IRB.getInt32Ty())); 
-    Instruction *Term = 
-        SplitBlockAndInsertIfThen(UseAfterReturnIsEnabled, InsBefore, false); 
-    IRBuilder<> IRBIf(Term); 
-    StackMallocIdx = StackMallocSizeClass(LocalStackSize); 
-    assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass); 
-    Value *FakeStackValue = 
-        IRBIf.CreateCall(AsanStackMallocFunc[StackMallocIdx], 
-                         ConstantInt::get(IntptrTy, LocalStackSize)); 
-    IRB.SetInsertPoint(InsBefore); 
-    FakeStack = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, Term, 
-                          ConstantInt::get(IntptrTy, 0)); 
- 
-    Value *NoFakeStack = 
-        IRB.CreateICmpEQ(FakeStack, Constant::getNullValue(IntptrTy)); 
-    Term = SplitBlockAndInsertIfThen(NoFakeStack, InsBefore, false); 
-    IRBIf.SetInsertPoint(Term); 
-    Value *AllocaValue = 
-        DoDynamicAlloca ? createAllocaForLayout(IRBIf, L, true) : StaticAlloca; 
- 
-    IRB.SetInsertPoint(InsBefore); 
-    LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack); 
-    IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca); 
-    DIExprFlags |= DIExpression::DerefBefore; 
-  } else { 
-    // void *FakeStack = nullptr; 
-    // void *LocalStackBase = alloca(LocalStackSize); 
-    FakeStack = ConstantInt::get(IntptrTy, 0); 
-    LocalStackBase = 
-        DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca; 
-    LocalStackBaseAlloca = LocalStackBase; 
-  } 
- 
-  // It shouldn't matter whether we pass an `alloca` or a `ptrtoint` as the 
-  // dbg.declare address opereand, but passing a `ptrtoint` seems to confuse 
-  // later passes and can result in dropped variable coverage in debug info. 
-  Value *LocalStackBaseAllocaPtr = 
-      isa<PtrToIntInst>(LocalStackBaseAlloca) 
-          ? cast<PtrToIntInst>(LocalStackBaseAlloca)->getPointerOperand() 
-          : LocalStackBaseAlloca; 
-  assert(isa<AllocaInst>(LocalStackBaseAllocaPtr) && 
-         "Variable descriptions relative to ASan stack base will be dropped"); 
- 
-  // Replace Alloca instructions with base+offset. 
-  for (const auto &Desc : SVD) { 
-    AllocaInst *AI = Desc.AI; 
-    replaceDbgDeclare(AI, LocalStackBaseAllocaPtr, DIB, DIExprFlags, 
-                      Desc.Offset); 
-    Value *NewAllocaPtr = IRB.CreateIntToPtr( 
-        IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), 
-        AI->getType()); 
-    AI->replaceAllUsesWith(NewAllocaPtr); 
-  } 
- 
-  // The left-most redzone has enough space for at least 4 pointers. 
-  // Write the Magic value to redzone[0]. 
-  Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy); 
-  IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), 
-                  BasePlus0); 
-  // Write the frame description constant to redzone[1]. 
-  Value *BasePlus1 = IRB.CreateIntToPtr( 
-      IRB.CreateAdd(LocalStackBase, 
-                    ConstantInt::get(IntptrTy, ASan.LongSize / 8)), 
-      IntptrPtrTy); 
-  GlobalVariable *StackDescriptionGlobal = 
-      createPrivateGlobalForString(*F.getParent(), DescriptionString, 
-                                   /*AllowMerging*/ true, kAsanGenPrefix); 
-  Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); 
-  IRB.CreateStore(Description, BasePlus1); 
-  // Write the PC to redzone[2]. 
-  Value *BasePlus2 = IRB.CreateIntToPtr( 
-      IRB.CreateAdd(LocalStackBase, 
-                    ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)), 
-      IntptrPtrTy); 
-  IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); 
- 
-  const auto &ShadowAfterScope = GetShadowBytesAfterScope(SVD, L); 
- 
-  // Poison the stack red zones at the entry. 
-  Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB); 
-  // As mask we must use most poisoned case: red zones and after scope. 
-  // As bytes we can use either the same or just red zones only. 
-  copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase); 
- 
-  if (!StaticAllocaPoisonCallVec.empty()) { 
-    const auto &ShadowInScope = GetShadowBytes(SVD, L); 
- 
-    // Poison static allocas near lifetime intrinsics. 
-    for (const auto &APC : StaticAllocaPoisonCallVec) { 
-      const ASanStackVariableDescription &Desc = *AllocaToSVDMap[APC.AI]; 
-      assert(Desc.Offset % L.Granularity == 0); 
-      size_t Begin = Desc.Offset / L.Granularity; 
-      size_t End = Begin + (APC.Size + L.Granularity - 1) / L.Granularity; 
- 
-      IRBuilder<> IRB(APC.InsBefore); 
-      copyToShadow(ShadowAfterScope, 
-                   APC.DoPoison ? ShadowAfterScope : ShadowInScope, Begin, End, 
-                   IRB, ShadowBase); 
-    } 
-  } 
- 
-  SmallVector<uint8_t, 64> ShadowClean(ShadowAfterScope.size(), 0); 
-  SmallVector<uint8_t, 64> ShadowAfterReturn; 
- 
-  // (Un)poison the stack before all ret instructions. 
+
+  Instruction *InsBefore = AllocaVec[0];
+  IRBuilder<> IRB(InsBefore);
+
+  // Make sure non-instrumented allocas stay in the entry block. Otherwise,
+  // debug info is broken, because only entry-block allocas are treated as
+  // regular stack slots.
+  auto InsBeforeB = InsBefore->getParent();
+  assert(InsBeforeB == &F.getEntryBlock());
+  for (auto *AI : StaticAllocasToMoveUp)
+    if (AI->getParent() == InsBeforeB)
+      AI->moveBefore(InsBefore);
+
+  // Move stores of arguments into entry-block allocas as well. This prevents
+  // extra stack slots from being generated (to house the argument values until
+  // they can be stored into the allocas). This also prevents uninitialized
+  // values from being shown in backtraces.
+  SmallVector<Instruction *, 8> ArgInitInsts;
+  findStoresToUninstrumentedArgAllocas(ASan, *InsBefore, ArgInitInsts);
+  for (Instruction *ArgInitInst : ArgInitInsts)
+    ArgInitInst->moveBefore(InsBefore);
+
+  // If we have a call to llvm.localescape, keep it in the entry block.
+  if (LocalEscapeCall) LocalEscapeCall->moveBefore(InsBefore);
+
+  SmallVector<ASanStackVariableDescription, 16> SVD;
+  SVD.reserve(AllocaVec.size());
+  for (AllocaInst *AI : AllocaVec) {
+    ASanStackVariableDescription D = {AI->getName().data(),
+                                      ASan.getAllocaSizeInBytes(*AI),
+                                      0,
+                                      AI->getAlignment(),
+                                      AI,
+                                      0,
+                                      0};
+    SVD.push_back(D);
+  }
+
+  // Minimal header size (left redzone) is 4 pointers,
+  // i.e. 32 bytes on 64-bit platforms and 16 bytes in 32-bit platforms.
+  size_t Granularity = 1ULL << Mapping.Scale;
+  size_t MinHeaderSize = std::max((size_t)ASan.LongSize / 2, Granularity);
+  const ASanStackFrameLayout &L =
+      ComputeASanStackFrameLayout(SVD, Granularity, MinHeaderSize);
+
+  // Build AllocaToSVDMap for ASanStackVariableDescription lookup.
+  DenseMap<const AllocaInst *, ASanStackVariableDescription *> AllocaToSVDMap;
+  for (auto &Desc : SVD)
+    AllocaToSVDMap[Desc.AI] = &Desc;
+
+  // Update SVD with information from lifetime intrinsics.
+  for (const auto &APC : StaticAllocaPoisonCallVec) {
+    assert(APC.InsBefore);
+    assert(APC.AI);
+    assert(ASan.isInterestingAlloca(*APC.AI));
+    assert(APC.AI->isStaticAlloca());
+
+    ASanStackVariableDescription &Desc = *AllocaToSVDMap[APC.AI];
+    Desc.LifetimeSize = Desc.Size;
+    if (const DILocation *FnLoc = EntryDebugLocation.get()) {
+      if (const DILocation *LifetimeLoc = APC.InsBefore->getDebugLoc().get()) {
+        if (LifetimeLoc->getFile() == FnLoc->getFile())
+          if (unsigned Line = LifetimeLoc->getLine())
+            Desc.Line = std::min(Desc.Line ? Desc.Line : Line, Line);
+      }
+    }
+  }
+
+  auto DescriptionString = ComputeASanStackFrameDescription(SVD);
+  LLVM_DEBUG(dbgs() << DescriptionString << " --- " << L.FrameSize << "\n");
+  uint64_t LocalStackSize = L.FrameSize;
+  bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel &&
+                       LocalStackSize <= kMaxStackMallocSize;
+  bool DoDynamicAlloca = ClDynamicAllocaStack;
+  // Don't do dynamic alloca or stack malloc if:
+  // 1) There is inline asm: too often it makes assumptions on which registers
+  //    are available.
+  // 2) There is a returns_twice call (typically setjmp), which is
+  //    optimization-hostile, and doesn't play well with introduced indirect
+  //    register-relative calculation of local variable addresses.
+  DoDynamicAlloca &= !HasInlineAsm && !HasReturnsTwiceCall;
+  DoStackMalloc &= !HasInlineAsm && !HasReturnsTwiceCall;
+
+  Value *StaticAlloca =
+      DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false);
+
+  Value *FakeStack;
+  Value *LocalStackBase;
+  Value *LocalStackBaseAlloca;
+  uint8_t DIExprFlags = DIExpression::ApplyOffset;
+
+  if (DoStackMalloc) {
+    LocalStackBaseAlloca =
+        IRB.CreateAlloca(IntptrTy, nullptr, "asan_local_stack_base");
+    // void *FakeStack = __asan_option_detect_stack_use_after_return
+    //     ? __asan_stack_malloc_N(LocalStackSize)
+    //     : nullptr;
+    // void *LocalStackBase = (FakeStack) ? FakeStack : alloca(LocalStackSize);
+    Constant *OptionDetectUseAfterReturn = F.getParent()->getOrInsertGlobal(
+        kAsanOptionDetectUseAfterReturn, IRB.getInt32Ty());
+    Value *UseAfterReturnIsEnabled = IRB.CreateICmpNE(
+        IRB.CreateLoad(IRB.getInt32Ty(), OptionDetectUseAfterReturn),
+        Constant::getNullValue(IRB.getInt32Ty()));
+    Instruction *Term =
+        SplitBlockAndInsertIfThen(UseAfterReturnIsEnabled, InsBefore, false);
+    IRBuilder<> IRBIf(Term);
+    StackMallocIdx = StackMallocSizeClass(LocalStackSize);
+    assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass);
+    Value *FakeStackValue =
+        IRBIf.CreateCall(AsanStackMallocFunc[StackMallocIdx],
+                         ConstantInt::get(IntptrTy, LocalStackSize));
+    IRB.SetInsertPoint(InsBefore);
+    FakeStack = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, Term,
+                          ConstantInt::get(IntptrTy, 0));
+
+    Value *NoFakeStack =
+        IRB.CreateICmpEQ(FakeStack, Constant::getNullValue(IntptrTy));
+    Term = SplitBlockAndInsertIfThen(NoFakeStack, InsBefore, false);
+    IRBIf.SetInsertPoint(Term);
+    Value *AllocaValue =
+        DoDynamicAlloca ? createAllocaForLayout(IRBIf, L, true) : StaticAlloca;
+
+    IRB.SetInsertPoint(InsBefore);
+    LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack);
+    IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca);
+    DIExprFlags |= DIExpression::DerefBefore;
+  } else {
+    // void *FakeStack = nullptr;
+    // void *LocalStackBase = alloca(LocalStackSize);
+    FakeStack = ConstantInt::get(IntptrTy, 0);
+    LocalStackBase =
+        DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca;
+    LocalStackBaseAlloca = LocalStackBase;
+  }
+
+  // It shouldn't matter whether we pass an `alloca` or a `ptrtoint` as the
+  // dbg.declare address opereand, but passing a `ptrtoint` seems to confuse
+  // later passes and can result in dropped variable coverage in debug info.
+  Value *LocalStackBaseAllocaPtr =
+      isa<PtrToIntInst>(LocalStackBaseAlloca)
+          ? cast<PtrToIntInst>(LocalStackBaseAlloca)->getPointerOperand()
+          : LocalStackBaseAlloca;
+  assert(isa<AllocaInst>(LocalStackBaseAllocaPtr) &&
+         "Variable descriptions relative to ASan stack base will be dropped");
+
+  // Replace Alloca instructions with base+offset.
+  for (const auto &Desc : SVD) {
+    AllocaInst *AI = Desc.AI;
+    replaceDbgDeclare(AI, LocalStackBaseAllocaPtr, DIB, DIExprFlags,
+                      Desc.Offset);
+    Value *NewAllocaPtr = IRB.CreateIntToPtr(
+        IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
+        AI->getType());
+    AI->replaceAllUsesWith(NewAllocaPtr);
+  }
+
+  // The left-most redzone has enough space for at least 4 pointers.
+  // Write the Magic value to redzone[0].
+  Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
+  IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
+                  BasePlus0);
+  // Write the frame description constant to redzone[1].
+  Value *BasePlus1 = IRB.CreateIntToPtr(
+      IRB.CreateAdd(LocalStackBase,
+                    ConstantInt::get(IntptrTy, ASan.LongSize / 8)),
+      IntptrPtrTy);
+  GlobalVariable *StackDescriptionGlobal =
+      createPrivateGlobalForString(*F.getParent(), DescriptionString,
+                                   /*AllowMerging*/ true, kAsanGenPrefix);
+  Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
+  IRB.CreateStore(Description, BasePlus1);
+  // Write the PC to redzone[2].
+  Value *BasePlus2 = IRB.CreateIntToPtr(
+      IRB.CreateAdd(LocalStackBase,
+                    ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)),
+      IntptrPtrTy);
+  IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
+
+  const auto &ShadowAfterScope = GetShadowBytesAfterScope(SVD, L);
+
+  // Poison the stack red zones at the entry.
+  Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
+  // As mask we must use most poisoned case: red zones and after scope.
+  // As bytes we can use either the same or just red zones only.
+  copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase);
+
+  if (!StaticAllocaPoisonCallVec.empty()) {
+    const auto &ShadowInScope = GetShadowBytes(SVD, L);
+
+    // Poison static allocas near lifetime intrinsics.
+    for (const auto &APC : StaticAllocaPoisonCallVec) {
+      const ASanStackVariableDescription &Desc = *AllocaToSVDMap[APC.AI];
+      assert(Desc.Offset % L.Granularity == 0);
+      size_t Begin = Desc.Offset / L.Granularity;
+      size_t End = Begin + (APC.Size + L.Granularity - 1) / L.Granularity;
+
+      IRBuilder<> IRB(APC.InsBefore);
+      copyToShadow(ShadowAfterScope,
+                   APC.DoPoison ? ShadowAfterScope : ShadowInScope, Begin, End,
+                   IRB, ShadowBase);
+    }
+  }
+
+  SmallVector<uint8_t, 64> ShadowClean(ShadowAfterScope.size(), 0);
+  SmallVector<uint8_t, 64> ShadowAfterReturn;
+
+  // (Un)poison the stack before all ret instructions.
   for (Instruction *Ret : RetVec) {
-    IRBuilder<> IRBRet(Ret); 
-    // Mark the current frame as retired. 
-    IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic), 
-                       BasePlus0); 
-    if (DoStackMalloc) { 
-      assert(StackMallocIdx >= 0); 
-      // if FakeStack != 0  // LocalStackBase == FakeStack 
-      //     // In use-after-return mode, poison the whole stack frame. 
-      //     if StackMallocIdx <= 4 
-      //         // For small sizes inline the whole thing: 
-      //         memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize); 
-      //         **SavedFlagPtr(FakeStack) = 0 
-      //     else 
-      //         __asan_stack_free_N(FakeStack, LocalStackSize) 
-      // else 
-      //     <This is not a fake stack; unpoison the redzones> 
-      Value *Cmp = 
-          IRBRet.CreateICmpNE(FakeStack, Constant::getNullValue(IntptrTy)); 
-      Instruction *ThenTerm, *ElseTerm; 
-      SplitBlockAndInsertIfThenElse(Cmp, Ret, &ThenTerm, &ElseTerm); 
- 
-      IRBuilder<> IRBPoison(ThenTerm); 
-      if (StackMallocIdx <= 4) { 
-        int ClassSize = kMinStackMallocSize << StackMallocIdx; 
-        ShadowAfterReturn.resize(ClassSize / L.Granularity, 
-                                 kAsanStackUseAfterReturnMagic); 
-        copyToShadow(ShadowAfterReturn, ShadowAfterReturn, IRBPoison, 
-                     ShadowBase); 
-        Value *SavedFlagPtrPtr = IRBPoison.CreateAdd( 
-            FakeStack, 
-            ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8)); 
-        Value *SavedFlagPtr = IRBPoison.CreateLoad( 
-            IntptrTy, IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy)); 
-        IRBPoison.CreateStore( 
-            Constant::getNullValue(IRBPoison.getInt8Ty()), 
-            IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy())); 
-      } else { 
-        // For larger frames call __asan_stack_free_*. 
-        IRBPoison.CreateCall( 
-            AsanStackFreeFunc[StackMallocIdx], 
-            {FakeStack, ConstantInt::get(IntptrTy, LocalStackSize)}); 
-      } 
- 
-      IRBuilder<> IRBElse(ElseTerm); 
-      copyToShadow(ShadowAfterScope, ShadowClean, IRBElse, ShadowBase); 
-    } else { 
-      copyToShadow(ShadowAfterScope, ShadowClean, IRBRet, ShadowBase); 
-    } 
-  } 
- 
-  // We are done. Remove the old unused alloca instructions. 
-  for (auto AI : AllocaVec) AI->eraseFromParent(); 
-} 
- 
-void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, 
-                                         IRBuilder<> &IRB, bool DoPoison) { 
-  // For now just insert the call to ASan runtime. 
-  Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy); 
-  Value *SizeArg = ConstantInt::get(IntptrTy, Size); 
-  IRB.CreateCall( 
-      DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc, 
-      {AddrArg, SizeArg}); 
-} 
- 
-// Handling llvm.lifetime intrinsics for a given %alloca: 
-// (1) collect all llvm.lifetime.xxx(%size, %value) describing the alloca. 
-// (2) if %size is constant, poison memory for llvm.lifetime.end (to detect 
-//     invalid accesses) and unpoison it for llvm.lifetime.start (the memory 
-//     could be poisoned by previous llvm.lifetime.end instruction, as the 
-//     variable may go in and out of scope several times, e.g. in loops). 
-// (3) if we poisoned at least one %alloca in a function, 
-//     unpoison the whole stack frame at function exit. 
-void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { 
-  IRBuilder<> IRB(AI); 
- 
-  const unsigned Alignment = std::max(kAllocaRzSize, AI->getAlignment()); 
-  const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1; 
- 
-  Value *Zero = Constant::getNullValue(IntptrTy); 
-  Value *AllocaRzSize = ConstantInt::get(IntptrTy, kAllocaRzSize); 
-  Value *AllocaRzMask = ConstantInt::get(IntptrTy, AllocaRedzoneMask); 
- 
-  // Since we need to extend alloca with additional memory to locate 
-  // redzones, and OldSize is number of allocated blocks with 
-  // ElementSize size, get allocated memory size in bytes by 
-  // OldSize * ElementSize. 
-  const unsigned ElementSize = 
-      F.getParent()->getDataLayout().getTypeAllocSize(AI->getAllocatedType()); 
-  Value *OldSize = 
-      IRB.CreateMul(IRB.CreateIntCast(AI->getArraySize(), IntptrTy, false), 
-                    ConstantInt::get(IntptrTy, ElementSize)); 
- 
-  // PartialSize = OldSize % 32 
-  Value *PartialSize = IRB.CreateAnd(OldSize, AllocaRzMask); 
- 
-  // Misalign = kAllocaRzSize - PartialSize; 
-  Value *Misalign = IRB.CreateSub(AllocaRzSize, PartialSize); 
- 
-  // PartialPadding = Misalign != kAllocaRzSize ? Misalign : 0; 
-  Value *Cond = IRB.CreateICmpNE(Misalign, AllocaRzSize); 
-  Value *PartialPadding = IRB.CreateSelect(Cond, Misalign, Zero); 
- 
-  // AdditionalChunkSize = Alignment + PartialPadding + kAllocaRzSize 
-  // Alignment is added to locate left redzone, PartialPadding for possible 
-  // partial redzone and kAllocaRzSize for right redzone respectively. 
-  Value *AdditionalChunkSize = IRB.CreateAdd( 
-      ConstantInt::get(IntptrTy, Alignment + kAllocaRzSize), PartialPadding); 
- 
-  Value *NewSize = IRB.CreateAdd(OldSize, AdditionalChunkSize); 
- 
-  // Insert new alloca with new NewSize and Alignment params. 
-  AllocaInst *NewAlloca = IRB.CreateAlloca(IRB.getInt8Ty(), NewSize); 
-  NewAlloca->setAlignment(Align(Alignment)); 
- 
-  // NewAddress = Address + Alignment 
-  Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy), 
-                                    ConstantInt::get(IntptrTy, Alignment)); 
- 
-  // Insert __asan_alloca_poison call for new created alloca. 
-  IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize}); 
- 
-  // Store the last alloca's address to DynamicAllocaLayout. We'll need this 
-  // for unpoisoning stuff. 
-  IRB.CreateStore(IRB.CreatePtrToInt(NewAlloca, IntptrTy), DynamicAllocaLayout); 
- 
-  Value *NewAddressPtr = IRB.CreateIntToPtr(NewAddress, AI->getType()); 
- 
-  // Replace all uses of AddessReturnedByAlloca with NewAddressPtr. 
-  AI->replaceAllUsesWith(NewAddressPtr); 
- 
-  // We are done. Erase old alloca from parent. 
-  AI->eraseFromParent(); 
-} 
- 
-// isSafeAccess returns true if Addr is always inbounds with respect to its 
-// base object. For example, it is a field access or an array access with 
-// constant inbounds index. 
-bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, 
-                                    Value *Addr, uint64_t TypeSize) const { 
-  SizeOffsetType SizeOffset = ObjSizeVis.compute(Addr); 
-  if (!ObjSizeVis.bothKnown(SizeOffset)) return false; 
-  uint64_t Size = SizeOffset.first.getZExtValue(); 
-  int64_t Offset = SizeOffset.second.getSExtValue(); 
-  // Three checks are required to ensure safety: 
-  // . Offset >= 0  (since the offset is given from the base ptr) 
-  // . Size >= Offset  (unsigned) 
-  // . Size - Offset >= NeededSize  (unsigned) 
-  return Offset >= 0 && Size >= uint64_t(Offset) && 
-         Size - uint64_t(Offset) >= TypeSize / 8; 
-} 
+    IRBuilder<> IRBRet(Ret);
+    // Mark the current frame as retired.
+    IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic),
+                       BasePlus0);
+    if (DoStackMalloc) {
+      assert(StackMallocIdx >= 0);
+      // if FakeStack != 0  // LocalStackBase == FakeStack
+      //     // In use-after-return mode, poison the whole stack frame.
+      //     if StackMallocIdx <= 4
+      //         // For small sizes inline the whole thing:
+      //         memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize);
+      //         **SavedFlagPtr(FakeStack) = 0
+      //     else
+      //         __asan_stack_free_N(FakeStack, LocalStackSize)
+      // else
+      //     <This is not a fake stack; unpoison the redzones>
+      Value *Cmp =
+          IRBRet.CreateICmpNE(FakeStack, Constant::getNullValue(IntptrTy));
+      Instruction *ThenTerm, *ElseTerm;
+      SplitBlockAndInsertIfThenElse(Cmp, Ret, &ThenTerm, &ElseTerm);
+
+      IRBuilder<> IRBPoison(ThenTerm);
+      if (StackMallocIdx <= 4) {
+        int ClassSize = kMinStackMallocSize << StackMallocIdx;
+        ShadowAfterReturn.resize(ClassSize / L.Granularity,
+                                 kAsanStackUseAfterReturnMagic);
+        copyToShadow(ShadowAfterReturn, ShadowAfterReturn, IRBPoison,
+                     ShadowBase);
+        Value *SavedFlagPtrPtr = IRBPoison.CreateAdd(
+            FakeStack,
+            ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8));
+        Value *SavedFlagPtr = IRBPoison.CreateLoad(
+            IntptrTy, IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
+        IRBPoison.CreateStore(
+            Constant::getNullValue(IRBPoison.getInt8Ty()),
+            IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
+      } else {
+        // For larger frames call __asan_stack_free_*.
+        IRBPoison.CreateCall(
+            AsanStackFreeFunc[StackMallocIdx],
+            {FakeStack, ConstantInt::get(IntptrTy, LocalStackSize)});
+      }
+
+      IRBuilder<> IRBElse(ElseTerm);
+      copyToShadow(ShadowAfterScope, ShadowClean, IRBElse, ShadowBase);
+    } else {
+      copyToShadow(ShadowAfterScope, ShadowClean, IRBRet, ShadowBase);
+    }
+  }
+
+  // We are done. Remove the old unused alloca instructions.
+  for (auto AI : AllocaVec) AI->eraseFromParent();
+}
+
+void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
+                                         IRBuilder<> &IRB, bool DoPoison) {
+  // For now just insert the call to ASan runtime.
+  Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
+  Value *SizeArg = ConstantInt::get(IntptrTy, Size);
+  IRB.CreateCall(
+      DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc,
+      {AddrArg, SizeArg});
+}
+
+// Handling llvm.lifetime intrinsics for a given %alloca:
+// (1) collect all llvm.lifetime.xxx(%size, %value) describing the alloca.
+// (2) if %size is constant, poison memory for llvm.lifetime.end (to detect
+//     invalid accesses) and unpoison it for llvm.lifetime.start (the memory
+//     could be poisoned by previous llvm.lifetime.end instruction, as the
+//     variable may go in and out of scope several times, e.g. in loops).
+// (3) if we poisoned at least one %alloca in a function,
+//     unpoison the whole stack frame at function exit.
+void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
+  IRBuilder<> IRB(AI);
+
+  const unsigned Alignment = std::max(kAllocaRzSize, AI->getAlignment());
+  const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1;
+
+  Value *Zero = Constant::getNullValue(IntptrTy);
+  Value *AllocaRzSize = ConstantInt::get(IntptrTy, kAllocaRzSize);
+  Value *AllocaRzMask = ConstantInt::get(IntptrTy, AllocaRedzoneMask);
+
+  // Since we need to extend alloca with additional memory to locate
+  // redzones, and OldSize is number of allocated blocks with
+  // ElementSize size, get allocated memory size in bytes by
+  // OldSize * ElementSize.
+  const unsigned ElementSize =
+      F.getParent()->getDataLayout().getTypeAllocSize(AI->getAllocatedType());
+  Value *OldSize =
+      IRB.CreateMul(IRB.CreateIntCast(AI->getArraySize(), IntptrTy, false),
+                    ConstantInt::get(IntptrTy, ElementSize));
+
+  // PartialSize = OldSize % 32
+  Value *PartialSize = IRB.CreateAnd(OldSize, AllocaRzMask);
+
+  // Misalign = kAllocaRzSize - PartialSize;
+  Value *Misalign = IRB.CreateSub(AllocaRzSize, PartialSize);
+
+  // PartialPadding = Misalign != kAllocaRzSize ? Misalign : 0;
+  Value *Cond = IRB.CreateICmpNE(Misalign, AllocaRzSize);
+  Value *PartialPadding = IRB.CreateSelect(Cond, Misalign, Zero);
+
+  // AdditionalChunkSize = Alignment + PartialPadding + kAllocaRzSize
+  // Alignment is added to locate left redzone, PartialPadding for possible
+  // partial redzone and kAllocaRzSize for right redzone respectively.
+  Value *AdditionalChunkSize = IRB.CreateAdd(
+      ConstantInt::get(IntptrTy, Alignment + kAllocaRzSize), PartialPadding);
+
+  Value *NewSize = IRB.CreateAdd(OldSize, AdditionalChunkSize);
+
+  // Insert new alloca with new NewSize and Alignment params.
+  AllocaInst *NewAlloca = IRB.CreateAlloca(IRB.getInt8Ty(), NewSize);
+  NewAlloca->setAlignment(Align(Alignment));
+
+  // NewAddress = Address + Alignment
+  Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy),
+                                    ConstantInt::get(IntptrTy, Alignment));
+
+  // Insert __asan_alloca_poison call for new created alloca.
+  IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize});
+
+  // Store the last alloca's address to DynamicAllocaLayout. We'll need this
+  // for unpoisoning stuff.
+  IRB.CreateStore(IRB.CreatePtrToInt(NewAlloca, IntptrTy), DynamicAllocaLayout);
+
+  Value *NewAddressPtr = IRB.CreateIntToPtr(NewAddress, AI->getType());
+
+  // Replace all uses of AddessReturnedByAlloca with NewAddressPtr.
+  AI->replaceAllUsesWith(NewAddressPtr);
+
+  // We are done. Erase old alloca from parent.
+  AI->eraseFromParent();
+}
+
+// isSafeAccess returns true if Addr is always inbounds with respect to its
+// base object. For example, it is a field access or an array access with
+// constant inbounds index.
+bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis,
+                                    Value *Addr, uint64_t TypeSize) const {
+  SizeOffsetType SizeOffset = ObjSizeVis.compute(Addr);
+  if (!ObjSizeVis.bothKnown(SizeOffset)) return false;
+  uint64_t Size = SizeOffset.first.getZExtValue();
+  int64_t Offset = SizeOffset.second.getSExtValue();
+  // Three checks are required to ensure safety:
+  // . Offset >= 0  (since the offset is given from the base ptr)
+  // . Size >= Offset  (unsigned)
+  // . Size - Offset >= NeededSize  (unsigned)
+  return Offset >= 0 && Size >= uint64_t(Offset) &&
+         Size - uint64_t(Offset) >= TypeSize / 8;
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/BoundsChecking.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/BoundsChecking.cpp
index c2d9964ecc..efb11b68a1 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -1,254 +1,254 @@
-//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/BoundsChecking.h" 
-#include "llvm/ADT/Statistic.h" 
-#include "llvm/ADT/Twine.h" 
-#include "llvm/Analysis/MemoryBuiltins.h" 
-#include "llvm/Analysis/ScalarEvolution.h" 
-#include "llvm/Analysis/TargetFolder.h" 
-#include "llvm/Analysis/TargetLibraryInfo.h" 
-#include "llvm/IR/BasicBlock.h" 
-#include "llvm/IR/Constants.h" 
-#include "llvm/IR/DataLayout.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/InstIterator.h" 
-#include "llvm/IR/InstrTypes.h" 
-#include "llvm/IR/Instruction.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/Intrinsics.h" 
-#include "llvm/IR/Value.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Pass.h" 
-#include "llvm/Support/Casting.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/ErrorHandling.h" 
-#include "llvm/Support/raw_ostream.h" 
-#include <cstdint> 
-#include <vector> 
- 
-using namespace llvm; 
- 
-#define DEBUG_TYPE "bounds-checking" 
- 
-static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap", 
-                                  cl::desc("Use one trap block per function")); 
- 
-STATISTIC(ChecksAdded, "Bounds checks added"); 
-STATISTIC(ChecksSkipped, "Bounds checks skipped"); 
-STATISTIC(ChecksUnable, "Bounds checks unable to add"); 
- 
-using BuilderTy = IRBuilder<TargetFolder>; 
- 
-/// Gets the conditions under which memory accessing instructions will overflow. 
-/// 
-/// \p Ptr is the pointer that will be read/written, and \p InstVal is either 
-/// the result from the load or the value being stored. It is used to determine 
-/// the size of memory block that is touched. 
-/// 
-/// Returns the condition under which the access will overflow. 
-static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal, 
-                                 const DataLayout &DL, TargetLibraryInfo &TLI, 
-                                 ObjectSizeOffsetEvaluator &ObjSizeEval, 
-                                 BuilderTy &IRB, ScalarEvolution &SE) { 
-  uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType()); 
-  LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize) 
-                    << " bytes\n"); 
- 
-  SizeOffsetEvalType SizeOffset = ObjSizeEval.compute(Ptr); 
- 
-  if (!ObjSizeEval.bothKnown(SizeOffset)) { 
-    ++ChecksUnable; 
-    return nullptr; 
-  } 
- 
-  Value *Size   = SizeOffset.first; 
-  Value *Offset = SizeOffset.second; 
-  ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size); 
- 
-  Type *IntTy = DL.getIntPtrType(Ptr->getType()); 
-  Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); 
- 
-  auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size)); 
-  auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset)); 
-  auto NeededSizeRange = SE.getUnsignedRange(SE.getSCEV(NeededSizeVal)); 
- 
-  // three checks are required to ensure safety: 
-  // . Offset >= 0  (since the offset is given from the base ptr) 
-  // . Size >= Offset  (unsigned) 
-  // . Size - Offset >= NeededSize  (unsigned) 
-  // 
-  // optimization: if Size >= 0 (signed), skip 1st check 
-  // FIXME: add NSW/NUW here?  -- we dont care if the subtraction overflows 
-  Value *ObjSize = IRB.CreateSub(Size, Offset); 
-  Value *Cmp2 = SizeRange.getUnsignedMin().uge(OffsetRange.getUnsignedMax()) 
-                    ? ConstantInt::getFalse(Ptr->getContext()) 
-                    : IRB.CreateICmpULT(Size, Offset); 
-  Value *Cmp3 = SizeRange.sub(OffsetRange) 
-                        .getUnsignedMin() 
-                        .uge(NeededSizeRange.getUnsignedMax()) 
-                    ? ConstantInt::getFalse(Ptr->getContext()) 
-                    : IRB.CreateICmpULT(ObjSize, NeededSizeVal); 
-  Value *Or = IRB.CreateOr(Cmp2, Cmp3); 
-  if ((!SizeCI || SizeCI->getValue().slt(0)) && 
-      !SizeRange.getSignedMin().isNonNegative()) { 
-    Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0)); 
-    Or = IRB.CreateOr(Cmp1, Or); 
-  } 
- 
-  return Or; 
-} 
- 
-/// Adds run-time bounds checks to memory accessing instructions. 
-/// 
-/// \p Or is the condition that should guard the trap. 
-/// 
-/// \p GetTrapBB is a callable that returns the trap BB to use on failure. 
-template <typename GetTrapBBT> 
-static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) { 
-  // check if the comparison is always false 
-  ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or); 
-  if (C) { 
-    ++ChecksSkipped; 
-    // If non-zero, nothing to do. 
-    if (!C->getZExtValue()) 
-      return; 
-  } 
-  ++ChecksAdded; 
- 
-  BasicBlock::iterator SplitI = IRB.GetInsertPoint(); 
-  BasicBlock *OldBB = SplitI->getParent(); 
-  BasicBlock *Cont = OldBB->splitBasicBlock(SplitI); 
-  OldBB->getTerminator()->eraseFromParent(); 
- 
-  if (C) { 
-    // If we have a constant zero, unconditionally branch. 
-    // FIXME: We should really handle this differently to bypass the splitting 
-    // the block. 
-    BranchInst::Create(GetTrapBB(IRB), OldBB); 
-    return; 
-  } 
- 
-  // Create the conditional branch. 
-  BranchInst::Create(GetTrapBB(IRB), Cont, Or, OldBB); 
-} 
- 
-static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, 
-                              ScalarEvolution &SE) { 
-  const DataLayout &DL = F.getParent()->getDataLayout(); 
-  ObjectSizeOpts EvalOpts; 
-  EvalOpts.RoundToAlign = true; 
-  ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts); 
- 
-  // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory 
-  // touching instructions 
-  SmallVector<std::pair<Instruction *, Value *>, 4> TrapInfo; 
-  for (Instruction &I : instructions(F)) { 
-    Value *Or = nullptr; 
-    BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL)); 
-    if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 
-      if (!LI->isVolatile()) 
-        Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI, 
-                                ObjSizeEval, IRB, SE); 
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { 
-      if (!SI->isVolatile()) 
-        Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(), 
-                                DL, TLI, ObjSizeEval, IRB, SE); 
-    } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) { 
-      if (!AI->isVolatile()) 
-        Or = 
-            getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(), 
-                               DL, TLI, ObjSizeEval, IRB, SE); 
-    } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) { 
-      if (!AI->isVolatile()) 
-        Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(), 
-                                DL, TLI, ObjSizeEval, IRB, SE); 
-    } 
-    if (Or) 
-      TrapInfo.push_back(std::make_pair(&I, Or)); 
-  } 
- 
-  // Create a trapping basic block on demand using a callback. Depending on 
-  // flags, this will either create a single block for the entire function or 
-  // will create a fresh block every time it is called. 
-  BasicBlock *TrapBB = nullptr; 
-  auto GetTrapBB = [&TrapBB](BuilderTy &IRB) { 
-    if (TrapBB && SingleTrapBB) 
-      return TrapBB; 
- 
-    Function *Fn = IRB.GetInsertBlock()->getParent(); 
-    // FIXME: This debug location doesn't make a lot of sense in the 
-    // `SingleTrapBB` case. 
-    auto DebugLoc = IRB.getCurrentDebugLocation(); 
-    IRBuilder<>::InsertPointGuard Guard(IRB); 
-    TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn); 
-    IRB.SetInsertPoint(TrapBB); 
- 
-    auto *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap); 
-    CallInst *TrapCall = IRB.CreateCall(F, {}); 
-    TrapCall->setDoesNotReturn(); 
-    TrapCall->setDoesNotThrow(); 
-    TrapCall->setDebugLoc(DebugLoc); 
-    IRB.CreateUnreachable(); 
- 
-    return TrapBB; 
-  }; 
- 
-  // Add the checks. 
-  for (const auto &Entry : TrapInfo) { 
-    Instruction *Inst = Entry.first; 
-    BuilderTy IRB(Inst->getParent(), BasicBlock::iterator(Inst), TargetFolder(DL)); 
-    insertBoundsCheck(Entry.second, IRB, GetTrapBB); 
-  } 
- 
-  return !TrapInfo.empty(); 
-} 
- 
-PreservedAnalyses BoundsCheckingPass::run(Function &F, FunctionAnalysisManager &AM) { 
-  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 
-  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); 
- 
-  if (!addBoundsChecking(F, TLI, SE)) 
-    return PreservedAnalyses::all(); 
- 
-  return PreservedAnalyses::none(); 
-} 
- 
-namespace { 
-struct BoundsCheckingLegacyPass : public FunctionPass { 
-  static char ID; 
- 
-  BoundsCheckingLegacyPass() : FunctionPass(ID) { 
-    initializeBoundsCheckingLegacyPassPass(*PassRegistry::getPassRegistry()); 
-  } 
- 
-  bool runOnFunction(Function &F) override { 
-    auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 
-    auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 
-    return addBoundsChecking(F, TLI, SE); 
-  } 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.addRequired<TargetLibraryInfoWrapperPass>(); 
-    AU.addRequired<ScalarEvolutionWrapperPass>(); 
-  } 
-}; 
-} // namespace 
- 
-char BoundsCheckingLegacyPass::ID = 0; 
-INITIALIZE_PASS_BEGIN(BoundsCheckingLegacyPass, "bounds-checking", 
-                      "Run-time bounds checking", false, false) 
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 
-INITIALIZE_PASS_END(BoundsCheckingLegacyPass, "bounds-checking", 
-                    "Run-time bounds checking", false, false) 
- 
-FunctionPass *llvm::createBoundsCheckingLegacyPass() { 
-  return new BoundsCheckingLegacyPass(); 
-} 
+//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bounds-checking"
+
+static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
+                                  cl::desc("Use one trap block per function"));
+
+STATISTIC(ChecksAdded, "Bounds checks added");
+STATISTIC(ChecksSkipped, "Bounds checks skipped");
+STATISTIC(ChecksUnable, "Bounds checks unable to add");
+
+using BuilderTy = IRBuilder<TargetFolder>;
+
+/// Gets the conditions under which memory accessing instructions will overflow.
+///
+/// \p Ptr is the pointer that will be read/written, and \p InstVal is either
+/// the result from the load or the value being stored. It is used to determine
+/// the size of memory block that is touched.
+///
+/// Returns the condition under which the access will overflow.
+static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal,
+                                 const DataLayout &DL, TargetLibraryInfo &TLI,
+                                 ObjectSizeOffsetEvaluator &ObjSizeEval,
+                                 BuilderTy &IRB, ScalarEvolution &SE) {
+  uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType());
+  LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
+                    << " bytes\n");
+
+  SizeOffsetEvalType SizeOffset = ObjSizeEval.compute(Ptr);
+
+  if (!ObjSizeEval.bothKnown(SizeOffset)) {
+    ++ChecksUnable;
+    return nullptr;
+  }
+
+  Value *Size   = SizeOffset.first;
+  Value *Offset = SizeOffset.second;
+  ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
+
+  Type *IntTy = DL.getIntPtrType(Ptr->getType());
+  Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
+
+  auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size));
+  auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset));
+  auto NeededSizeRange = SE.getUnsignedRange(SE.getSCEV(NeededSizeVal));
+
+  // three checks are required to ensure safety:
+  // . Offset >= 0  (since the offset is given from the base ptr)
+  // . Size >= Offset  (unsigned)
+  // . Size - Offset >= NeededSize  (unsigned)
+  //
+  // optimization: if Size >= 0 (signed), skip 1st check
+  // FIXME: add NSW/NUW here?  -- we dont care if the subtraction overflows
+  Value *ObjSize = IRB.CreateSub(Size, Offset);
+  Value *Cmp2 = SizeRange.getUnsignedMin().uge(OffsetRange.getUnsignedMax())
+                    ? ConstantInt::getFalse(Ptr->getContext())
+                    : IRB.CreateICmpULT(Size, Offset);
+  Value *Cmp3 = SizeRange.sub(OffsetRange)
+                        .getUnsignedMin()
+                        .uge(NeededSizeRange.getUnsignedMax())
+                    ? ConstantInt::getFalse(Ptr->getContext())
+                    : IRB.CreateICmpULT(ObjSize, NeededSizeVal);
+  Value *Or = IRB.CreateOr(Cmp2, Cmp3);
+  if ((!SizeCI || SizeCI->getValue().slt(0)) &&
+      !SizeRange.getSignedMin().isNonNegative()) {
+    Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0));
+    Or = IRB.CreateOr(Cmp1, Or);
+  }
+
+  return Or;
+}
+
+/// Adds run-time bounds checks to memory accessing instructions.
+///
+/// \p Or is the condition that should guard the trap.
+///
+/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
+template <typename GetTrapBBT>
+static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) {
+  // check if the comparison is always false
+  ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or);
+  if (C) {
+    ++ChecksSkipped;
+    // If non-zero, nothing to do.
+    if (!C->getZExtValue())
+      return;
+  }
+  ++ChecksAdded;
+
+  BasicBlock::iterator SplitI = IRB.GetInsertPoint();
+  BasicBlock *OldBB = SplitI->getParent();
+  BasicBlock *Cont = OldBB->splitBasicBlock(SplitI);
+  OldBB->getTerminator()->eraseFromParent();
+
+  if (C) {
+    // If we have a constant zero, unconditionally branch.
+    // FIXME: We should really handle this differently to bypass the splitting
+    // the block.
+    BranchInst::Create(GetTrapBB(IRB), OldBB);
+    return;
+  }
+
+  // Create the conditional branch.
+  BranchInst::Create(GetTrapBB(IRB), Cont, Or, OldBB);
+}
+
+static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
+                              ScalarEvolution &SE) {
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  ObjectSizeOpts EvalOpts;
+  EvalOpts.RoundToAlign = true;
+  ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
+
+  // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
+  // touching instructions
+  SmallVector<std::pair<Instruction *, Value *>, 4> TrapInfo;
+  for (Instruction &I : instructions(F)) {
+    Value *Or = nullptr;
+    BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
+    if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+      if (!LI->isVolatile())
+        Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
+                                ObjSizeEval, IRB, SE);
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+      if (!SI->isVolatile())
+        Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
+                                DL, TLI, ObjSizeEval, IRB, SE);
+    } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+      if (!AI->isVolatile())
+        Or =
+            getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
+                               DL, TLI, ObjSizeEval, IRB, SE);
+    } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
+      if (!AI->isVolatile())
+        Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(),
+                                DL, TLI, ObjSizeEval, IRB, SE);
+    }
+    if (Or)
+      TrapInfo.push_back(std::make_pair(&I, Or));
+  }
+
+  // Create a trapping basic block on demand using a callback. Depending on
+  // flags, this will either create a single block for the entire function or
+  // will create a fresh block every time it is called.
+  BasicBlock *TrapBB = nullptr;
+  auto GetTrapBB = [&TrapBB](BuilderTy &IRB) {
+    if (TrapBB && SingleTrapBB)
+      return TrapBB;
+
+    Function *Fn = IRB.GetInsertBlock()->getParent();
+    // FIXME: This debug location doesn't make a lot of sense in the
+    // `SingleTrapBB` case.
+    auto DebugLoc = IRB.getCurrentDebugLocation();
+    IRBuilder<>::InsertPointGuard Guard(IRB);
+    TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
+    IRB.SetInsertPoint(TrapBB);
+
+    auto *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
+    CallInst *TrapCall = IRB.CreateCall(F, {});
+    TrapCall->setDoesNotReturn();
+    TrapCall->setDoesNotThrow();
+    TrapCall->setDebugLoc(DebugLoc);
+    IRB.CreateUnreachable();
+
+    return TrapBB;
+  };
+
+  // Add the checks.
+  for (const auto &Entry : TrapInfo) {
+    Instruction *Inst = Entry.first;
+    BuilderTy IRB(Inst->getParent(), BasicBlock::iterator(Inst), TargetFolder(DL));
+    insertBoundsCheck(Entry.second, IRB, GetTrapBB);
+  }
+
+  return !TrapInfo.empty();
+}
+
+PreservedAnalyses BoundsCheckingPass::run(Function &F, FunctionAnalysisManager &AM) {
+  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+
+  if (!addBoundsChecking(F, TLI, SE))
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
+
+namespace {
+struct BoundsCheckingLegacyPass : public FunctionPass {
+  static char ID;
+
+  BoundsCheckingLegacyPass() : FunctionPass(ID) {
+    initializeBoundsCheckingLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+    auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+    return addBoundsChecking(F, TLI, SE);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+  }
+};
+} // namespace
+
+char BoundsCheckingLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(BoundsCheckingLegacyPass, "bounds-checking",
+                      "Run-time bounds checking", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(BoundsCheckingLegacyPass, "bounds-checking",
+                    "Run-time bounds checking", false, false)
+
+FunctionPass *llvm::createBoundsCheckingLegacyPass() {
+  return new BoundsCheckingLegacyPass();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/CFGMST.h b/contrib/libs/llvm12/lib/Transforms/Instrumentation/CFGMST.h
index 9de6edaadf..6580b6d7d7 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/CFGMST.h
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/CFGMST.h
@@ -1,303 +1,303 @@
-//===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements a Union-find algorithm to compute Minimum Spanning Tree 
-// for a given CFG. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H 
-#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H 
- 
-#include "llvm/ADT/DenseMap.h" 
-#include "llvm/ADT/STLExtras.h" 
-#include "llvm/Analysis/BlockFrequencyInfo.h" 
-#include "llvm/Analysis/BranchProbabilityInfo.h" 
-#include "llvm/Analysis/CFG.h" 
-#include "llvm/Support/BranchProbability.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/raw_ostream.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include <utility> 
-#include <vector> 
- 
-#define DEBUG_TYPE "cfgmst" 
- 
-using namespace llvm; 
- 
-namespace llvm { 
- 
-/// An union-find based Minimum Spanning Tree for CFG 
-/// 
-/// Implements a Union-find algorithm to compute Minimum Spanning Tree 
-/// for a given CFG. 
-template <class Edge, class BBInfo> class CFGMST { 
-public: 
-  Function &F; 
- 
-  // Store all the edges in CFG. It may contain some stale edges 
-  // when Removed is set. 
-  std::vector<std::unique_ptr<Edge>> AllEdges; 
- 
-  // This map records the auxiliary information for each BB. 
-  DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos; 
- 
-  // Whehter the function has an exit block with no successors. 
-  // (For function with an infinite loop, this block may be absent) 
-  bool ExitBlockFound = false; 
- 
-  // Find the root group of the G and compress the path from G to the root. 
-  BBInfo *findAndCompressGroup(BBInfo *G) { 
-    if (G->Group != G) 
-      G->Group = findAndCompressGroup(static_cast<BBInfo *>(G->Group)); 
-    return static_cast<BBInfo *>(G->Group); 
-  } 
- 
-  // Union BB1 and BB2 into the same group and return true. 
-  // Returns false if BB1 and BB2 are already in the same group. 
-  bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) { 
-    BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1)); 
-    BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2)); 
- 
-    if (BB1G == BB2G) 
-      return false; 
- 
-    // Make the smaller rank tree a direct child or the root of high rank tree. 
-    if (BB1G->Rank < BB2G->Rank) 
-      BB1G->Group = BB2G; 
-    else { 
-      BB2G->Group = BB1G; 
-      // If the ranks are the same, increment root of one tree by one. 
-      if (BB1G->Rank == BB2G->Rank) 
-        BB1G->Rank++; 
-    } 
-    return true; 
-  } 
- 
-  // Give BB, return the auxiliary information. 
-  BBInfo &getBBInfo(const BasicBlock *BB) const { 
-    auto It = BBInfos.find(BB); 
-    assert(It->second.get() != nullptr); 
-    return *It->second.get(); 
-  } 
- 
-  // Give BB, return the auxiliary information if it's available. 
-  BBInfo *findBBInfo(const BasicBlock *BB) const { 
-    auto It = BBInfos.find(BB); 
-    if (It == BBInfos.end()) 
-      return nullptr; 
-    return It->second.get(); 
-  } 
- 
-  // Traverse the CFG using a stack. Find all the edges and assign the weight. 
-  // Edges with large weight will be put into MST first so they are less likely 
-  // to be instrumented. 
-  void buildEdges() { 
-    LLVM_DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n"); 
- 
-    const BasicBlock *Entry = &(F.getEntryBlock()); 
-    uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); 
-    // If we want to instrument the entry count, lower the weight to 0. 
+//===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Union-find algorithm to compute Minimum Spanning Tree
+// for a given CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
+#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <utility>
+#include <vector>
+
+#define DEBUG_TYPE "cfgmst"
+
+using namespace llvm;
+
+namespace llvm {
+
+/// An union-find based Minimum Spanning Tree for CFG
+///
+/// Implements a Union-find algorithm to compute Minimum Spanning Tree
+/// for a given CFG.
+template <class Edge, class BBInfo> class CFGMST {
+public:
+  Function &F;
+
+  // Store all the edges in CFG. It may contain some stale edges
+  // when Removed is set.
+  std::vector<std::unique_ptr<Edge>> AllEdges;
+
+  // This map records the auxiliary information for each BB.
+  DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos;
+
+  // Whehter the function has an exit block with no successors.
+  // (For function with an infinite loop, this block may be absent)
+  bool ExitBlockFound = false;
+
+  // Find the root group of the G and compress the path from G to the root.
+  BBInfo *findAndCompressGroup(BBInfo *G) {
+    if (G->Group != G)
+      G->Group = findAndCompressGroup(static_cast<BBInfo *>(G->Group));
+    return static_cast<BBInfo *>(G->Group);
+  }
+
+  // Union BB1 and BB2 into the same group and return true.
+  // Returns false if BB1 and BB2 are already in the same group.
+  bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) {
+    BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1));
+    BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2));
+
+    if (BB1G == BB2G)
+      return false;
+
+    // Make the smaller rank tree a direct child or the root of high rank tree.
+    if (BB1G->Rank < BB2G->Rank)
+      BB1G->Group = BB2G;
+    else {
+      BB2G->Group = BB1G;
+      // If the ranks are the same, increment root of one tree by one.
+      if (BB1G->Rank == BB2G->Rank)
+        BB1G->Rank++;
+    }
+    return true;
+  }
+
+  // Give BB, return the auxiliary information.
+  BBInfo &getBBInfo(const BasicBlock *BB) const {
+    auto It = BBInfos.find(BB);
+    assert(It->second.get() != nullptr);
+    return *It->second.get();
+  }
+
+  // Give BB, return the auxiliary information if it's available.
+  BBInfo *findBBInfo(const BasicBlock *BB) const {
+    auto It = BBInfos.find(BB);
+    if (It == BBInfos.end())
+      return nullptr;
+    return It->second.get();
+  }
+
+  // Traverse the CFG using a stack. Find all the edges and assign the weight.
+  // Edges with large weight will be put into MST first so they are less likely
+  // to be instrumented.
+  void buildEdges() {
+    LLVM_DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n");
+
+    const BasicBlock *Entry = &(F.getEntryBlock());
+    uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2);
+    // If we want to instrument the entry count, lower the weight to 0.
     if (InstrumentFuncEntry)
-      EntryWeight = 0; 
-    Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr, 
-         *ExitOutgoing = nullptr, *ExitIncoming = nullptr; 
-    uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0; 
- 
-    // Add a fake edge to the entry. 
-    EntryIncoming = &addEdge(nullptr, Entry, EntryWeight); 
-    LLVM_DEBUG(dbgs() << "  Edge: from fake node to " << Entry->getName() 
-                      << " w = " << EntryWeight << "\n"); 
- 
-    // Special handling for single BB functions. 
-    if (succ_empty(Entry)) { 
-      addEdge(Entry, nullptr, EntryWeight); 
-      return; 
-    } 
- 
-    static const uint32_t CriticalEdgeMultiplier = 1000; 
- 
-    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 
-      Instruction *TI = BB->getTerminator(); 
-      uint64_t BBWeight = 
-          (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2); 
-      uint64_t Weight = 2; 
-      if (int successors = TI->getNumSuccessors()) { 
-        for (int i = 0; i != successors; ++i) { 
-          BasicBlock *TargetBB = TI->getSuccessor(i); 
-          bool Critical = isCriticalEdge(TI, i); 
-          uint64_t scaleFactor = BBWeight; 
-          if (Critical) { 
-            if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) 
-              scaleFactor *= CriticalEdgeMultiplier; 
-            else 
-              scaleFactor = UINT64_MAX; 
-          } 
-          if (BPI != nullptr) 
-            Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); 
-          if (Weight == 0) 
-            Weight++; 
-          auto *E = &addEdge(&*BB, TargetBB, Weight); 
-          E->IsCritical = Critical; 
-          LLVM_DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to " 
-                            << TargetBB->getName() << "  w=" << Weight << "\n"); 
- 
-          // Keep track of entry/exit edges: 
-          if (&*BB == Entry) { 
-            if (Weight > MaxEntryOutWeight) { 
-              MaxEntryOutWeight = Weight; 
-              EntryOutgoing = E; 
-            } 
-          } 
- 
-          auto *TargetTI = TargetBB->getTerminator(); 
-          if (TargetTI && !TargetTI->getNumSuccessors()) { 
-            if (Weight > MaxExitInWeight) { 
-              MaxExitInWeight = Weight; 
-              ExitIncoming = E; 
-            } 
-          } 
-        } 
-      } else { 
-        ExitBlockFound = true; 
-        Edge *ExitO = &addEdge(&*BB, nullptr, BBWeight); 
-        if (BBWeight > MaxExitOutWeight) { 
-          MaxExitOutWeight = BBWeight; 
-          ExitOutgoing = ExitO; 
-        } 
-        LLVM_DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to fake exit" 
-                          << " w = " << BBWeight << "\n"); 
-      } 
-    } 
- 
-    // Entry/exit edge adjustment heurisitic: 
-    // prefer instrumenting entry edge over exit edge 
-    // if possible. Those exit edges may never have a chance to be 
-    // executed (for instance the program is an event handling loop) 
-    // before the profile is asynchronously dumped. 
-    // 
-    // If EntryIncoming and ExitOutgoing has similar weight, make sure 
-    // ExitOutging is selected as the min-edge. Similarly, if EntryOutgoing 
-    // and ExitIncoming has similar weight, make sure ExitIncoming becomes 
-    // the min-edge. 
-    uint64_t EntryInWeight = EntryWeight; 
- 
-    if (EntryInWeight >= MaxExitOutWeight && 
-        EntryInWeight * 2 < MaxExitOutWeight * 3) { 
-      EntryIncoming->Weight = MaxExitOutWeight; 
-      ExitOutgoing->Weight = EntryInWeight + 1; 
-    } 
- 
-    if (MaxEntryOutWeight >= MaxExitInWeight && 
-        MaxEntryOutWeight * 2 < MaxExitInWeight * 3) { 
-      EntryOutgoing->Weight = MaxExitInWeight; 
-      ExitIncoming->Weight = MaxEntryOutWeight + 1; 
-    } 
-  } 
- 
-  // Sort CFG edges based on its weight. 
-  void sortEdgesByWeight() { 
-    llvm::stable_sort(AllEdges, [](const std::unique_ptr<Edge> &Edge1, 
-                                   const std::unique_ptr<Edge> &Edge2) { 
-      return Edge1->Weight > Edge2->Weight; 
-    }); 
-  } 
- 
-  // Traverse all the edges and compute the Minimum Weight Spanning Tree 
-  // using union-find algorithm. 
-  void computeMinimumSpanningTree() { 
-    // First, put all the critical edge with landing-pad as the Dest to MST. 
-    // This works around the insufficient support of critical edges split 
-    // when destination BB is a landing pad. 
-    for (auto &Ei : AllEdges) { 
-      if (Ei->Removed) 
-        continue; 
-      if (Ei->IsCritical) { 
-        if (Ei->DestBB && Ei->DestBB->isLandingPad()) { 
-          if (unionGroups(Ei->SrcBB, Ei->DestBB)) 
-            Ei->InMST = true; 
-        } 
-      } 
-    } 
- 
-    for (auto &Ei : AllEdges) { 
-      if (Ei->Removed) 
-        continue; 
-      // If we detect infinite loops, force 
-      // instrumenting the entry edge: 
-      if (!ExitBlockFound && Ei->SrcBB == nullptr) 
-        continue; 
-      if (unionGroups(Ei->SrcBB, Ei->DestBB)) 
-        Ei->InMST = true; 
-    } 
-  } 
- 
-  // Dump the Debug information about the instrumentation. 
-  void dumpEdges(raw_ostream &OS, const Twine &Message) const { 
-    if (!Message.str().empty()) 
-      OS << Message << "\n"; 
-    OS << "  Number of Basic Blocks: " << BBInfos.size() << "\n"; 
-    for (auto &BI : BBInfos) { 
-      const BasicBlock *BB = BI.first; 
-      OS << "  BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << "  " 
-         << BI.second->infoString() << "\n"; 
-    } 
- 
-    OS << "  Number of Edges: " << AllEdges.size() 
-       << " (*: Instrument, C: CriticalEdge, -: Removed)\n"; 
-    uint32_t Count = 0; 
-    for (auto &EI : AllEdges) 
-      OS << "  Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->" 
-         << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n"; 
-  } 
- 
-  // Add an edge to AllEdges with weight W. 
-  Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) { 
-    uint32_t Index = BBInfos.size(); 
-    auto Iter = BBInfos.end(); 
-    bool Inserted; 
-    std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr)); 
-    if (Inserted) { 
-      // Newly inserted, update the real info. 
-      Iter->second = std::move(std::make_unique<BBInfo>(Index)); 
-      Index++; 
-    } 
-    std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr)); 
-    if (Inserted) 
-      // Newly inserted, update the real info. 
-      Iter->second = std::move(std::make_unique<BBInfo>(Index)); 
-    AllEdges.emplace_back(new Edge(Src, Dest, W)); 
-    return *AllEdges.back(); 
-  } 
- 
-  BranchProbabilityInfo *BPI; 
-  BlockFrequencyInfo *BFI; 
- 
+      EntryWeight = 0;
+    Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr,
+         *ExitOutgoing = nullptr, *ExitIncoming = nullptr;
+    uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0;
+
+    // Add a fake edge to the entry.
+    EntryIncoming = &addEdge(nullptr, Entry, EntryWeight);
+    LLVM_DEBUG(dbgs() << "  Edge: from fake node to " << Entry->getName()
+                      << " w = " << EntryWeight << "\n");
+
+    // Special handling for single BB functions.
+    if (succ_empty(Entry)) {
+      addEdge(Entry, nullptr, EntryWeight);
+      return;
+    }
+
+    static const uint32_t CriticalEdgeMultiplier = 1000;
+
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+      Instruction *TI = BB->getTerminator();
+      uint64_t BBWeight =
+          (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2);
+      uint64_t Weight = 2;
+      if (int successors = TI->getNumSuccessors()) {
+        for (int i = 0; i != successors; ++i) {
+          BasicBlock *TargetBB = TI->getSuccessor(i);
+          bool Critical = isCriticalEdge(TI, i);
+          uint64_t scaleFactor = BBWeight;
+          if (Critical) {
+            if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
+              scaleFactor *= CriticalEdgeMultiplier;
+            else
+              scaleFactor = UINT64_MAX;
+          }
+          if (BPI != nullptr)
+            Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor);
+          if (Weight == 0)
+            Weight++;
+          auto *E = &addEdge(&*BB, TargetBB, Weight);
+          E->IsCritical = Critical;
+          LLVM_DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to "
+                            << TargetBB->getName() << "  w=" << Weight << "\n");
+
+          // Keep track of entry/exit edges:
+          if (&*BB == Entry) {
+            if (Weight > MaxEntryOutWeight) {
+              MaxEntryOutWeight = Weight;
+              EntryOutgoing = E;
+            }
+          }
+
+          auto *TargetTI = TargetBB->getTerminator();
+          if (TargetTI && !TargetTI->getNumSuccessors()) {
+            if (Weight > MaxExitInWeight) {
+              MaxExitInWeight = Weight;
+              ExitIncoming = E;
+            }
+          }
+        }
+      } else {
+        ExitBlockFound = true;
+        Edge *ExitO = &addEdge(&*BB, nullptr, BBWeight);
+        if (BBWeight > MaxExitOutWeight) {
+          MaxExitOutWeight = BBWeight;
+          ExitOutgoing = ExitO;
+        }
+        LLVM_DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to fake exit"
+                          << " w = " << BBWeight << "\n");
+      }
+    }
+
+    // Entry/exit edge adjustment heurisitic:
+    // prefer instrumenting entry edge over exit edge
+    // if possible. Those exit edges may never have a chance to be
+    // executed (for instance the program is an event handling loop)
+    // before the profile is asynchronously dumped.
+    //
+    // If EntryIncoming and ExitOutgoing has similar weight, make sure
+    // ExitOutging is selected as the min-edge. Similarly, if EntryOutgoing
+    // and ExitIncoming has similar weight, make sure ExitIncoming becomes
+    // the min-edge.
+    uint64_t EntryInWeight = EntryWeight;
+
+    if (EntryInWeight >= MaxExitOutWeight &&
+        EntryInWeight * 2 < MaxExitOutWeight * 3) {
+      EntryIncoming->Weight = MaxExitOutWeight;
+      ExitOutgoing->Weight = EntryInWeight + 1;
+    }
+
+    if (MaxEntryOutWeight >= MaxExitInWeight &&
+        MaxEntryOutWeight * 2 < MaxExitInWeight * 3) {
+      EntryOutgoing->Weight = MaxExitInWeight;
+      ExitIncoming->Weight = MaxEntryOutWeight + 1;
+    }
+  }
+
+  // Sort CFG edges based on its weight.
+  void sortEdgesByWeight() {
+    llvm::stable_sort(AllEdges, [](const std::unique_ptr<Edge> &Edge1,
+                                   const std::unique_ptr<Edge> &Edge2) {
+      return Edge1->Weight > Edge2->Weight;
+    });
+  }
+
+  // Traverse all the edges and compute the Minimum Weight Spanning Tree
+  // using union-find algorithm.
+  void computeMinimumSpanningTree() {
+    // First, put all the critical edge with landing-pad as the Dest to MST.
+    // This works around the insufficient support of critical edges split
+    // when destination BB is a landing pad.
+    for (auto &Ei : AllEdges) {
+      if (Ei->Removed)
+        continue;
+      if (Ei->IsCritical) {
+        if (Ei->DestBB && Ei->DestBB->isLandingPad()) {
+          if (unionGroups(Ei->SrcBB, Ei->DestBB))
+            Ei->InMST = true;
+        }
+      }
+    }
+
+    for (auto &Ei : AllEdges) {
+      if (Ei->Removed)
+        continue;
+      // If we detect infinite loops, force
+      // instrumenting the entry edge:
+      if (!ExitBlockFound && Ei->SrcBB == nullptr)
+        continue;
+      if (unionGroups(Ei->SrcBB, Ei->DestBB))
+        Ei->InMST = true;
+    }
+  }
+
+  // Dump the Debug information about the instrumentation.
+  void dumpEdges(raw_ostream &OS, const Twine &Message) const {
+    if (!Message.str().empty())
+      OS << Message << "\n";
+    OS << "  Number of Basic Blocks: " << BBInfos.size() << "\n";
+    for (auto &BI : BBInfos) {
+      const BasicBlock *BB = BI.first;
+      OS << "  BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << "  "
+         << BI.second->infoString() << "\n";
+    }
+
+    OS << "  Number of Edges: " << AllEdges.size()
+       << " (*: Instrument, C: CriticalEdge, -: Removed)\n";
+    uint32_t Count = 0;
+    for (auto &EI : AllEdges)
+      OS << "  Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->"
+         << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n";
+  }
+
+  // Add an edge to AllEdges with weight W.
+  Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) {
+    uint32_t Index = BBInfos.size();
+    auto Iter = BBInfos.end();
+    bool Inserted;
+    std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr));
+    if (Inserted) {
+      // Newly inserted, update the real info.
+      Iter->second = std::move(std::make_unique<BBInfo>(Index));
+      Index++;
+    }
+    std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr));
+    if (Inserted)
+      // Newly inserted, update the real info.
+      Iter->second = std::move(std::make_unique<BBInfo>(Index));
+    AllEdges.emplace_back(new Edge(Src, Dest, W));
+    return *AllEdges.back();
+  }
+
+  BranchProbabilityInfo *BPI;
+  BlockFrequencyInfo *BFI;
+
   // If function entry will be always instrumented.
   bool InstrumentFuncEntry;
 
-public: 
+public:
   CFGMST(Function &Func, bool InstrumentFuncEntry_,
          BranchProbabilityInfo *BPI_ = nullptr,
-         BlockFrequencyInfo *BFI_ = nullptr) 
+         BlockFrequencyInfo *BFI_ = nullptr)
       : F(Func), BPI(BPI_), BFI(BFI_),
         InstrumentFuncEntry(InstrumentFuncEntry_) {
-    buildEdges(); 
-    sortEdgesByWeight(); 
-    computeMinimumSpanningTree(); 
+    buildEdges();
+    sortEdgesByWeight();
+    computeMinimumSpanningTree();
     if (AllEdges.size() > 1 && InstrumentFuncEntry)
-      std::iter_swap(std::move(AllEdges.begin()), 
-                     std::move(AllEdges.begin() + AllEdges.size() - 1)); 
-  } 
-}; 
- 
-} // end namespace llvm 
- 
-#undef DEBUG_TYPE // "cfgmst" 
- 
-#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H 
+      std::iter_swap(std::move(AllEdges.begin()),
+                     std::move(AllEdges.begin() + AllEdges.size() - 1));
+  }
+};
+
+} // end namespace llvm
+
+#undef DEBUG_TYPE // "cfgmst"
+
+#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/CGProfile.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/CGProfile.cpp
index 7f658fa68f..9acd82c005 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -1,153 +1,153 @@
-//===-- CGProfile.cpp -----------------------------------------------------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/CGProfile.h" 
- 
-#include "llvm/ADT/MapVector.h" 
-#include "llvm/Analysis/BlockFrequencyInfo.h" 
-#include "llvm/Analysis/LazyBlockFrequencyInfo.h" 
-#include "llvm/Analysis/TargetTransformInfo.h" 
-#include "llvm/IR/Constants.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/MDBuilder.h" 
-#include "llvm/IR/PassManager.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/ProfileData/InstrProf.h" 
-#include "llvm/Transforms/Instrumentation.h" 
- 
-#include <array> 
- 
-using namespace llvm; 
- 
-static bool 
-addModuleFlags(Module &M, 
-               MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) { 
-  if (Counts.empty()) 
-    return false; 
- 
-  LLVMContext &Context = M.getContext(); 
-  MDBuilder MDB(Context); 
-  std::vector<Metadata *> Nodes; 
- 
-  for (auto E : Counts) { 
-    Metadata *Vals[] = {ValueAsMetadata::get(E.first.first), 
-                        ValueAsMetadata::get(E.first.second), 
-                        MDB.createConstant(ConstantInt::get( 
-                            Type::getInt64Ty(Context), E.second))}; 
-    Nodes.push_back(MDNode::get(Context, Vals)); 
-  } 
- 
-  M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes)); 
-  return true; 
-} 
- 
-static bool runCGProfilePass( 
-    Module &M, function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 
-    function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LazyBFI) { 
-  MapVector<std::pair<Function *, Function *>, uint64_t> Counts; 
-  InstrProfSymtab Symtab; 
-  auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F, 
-                          Function *CalledF, uint64_t NewCount) { 
+//===-- CGProfile.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/CGProfile.h"
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Transforms/Instrumentation.h"
+
+#include <array>
+
+using namespace llvm;
+
+static bool
+addModuleFlags(Module &M,
+               MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) {
+  if (Counts.empty())
+    return false;
+
+  LLVMContext &Context = M.getContext();
+  MDBuilder MDB(Context);
+  std::vector<Metadata *> Nodes;
+
+  for (auto E : Counts) {
+    Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
+                        ValueAsMetadata::get(E.first.second),
+                        MDB.createConstant(ConstantInt::get(
+                            Type::getInt64Ty(Context), E.second))};
+    Nodes.push_back(MDNode::get(Context, Vals));
+  }
+
+  M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
+  return true;
+}
+
+static bool runCGProfilePass(
+    Module &M, function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
+    function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LazyBFI) {
+  MapVector<std::pair<Function *, Function *>, uint64_t> Counts;
+  InstrProfSymtab Symtab;
+  auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
+                          Function *CalledF, uint64_t NewCount) {
     if (!CalledF || !TTI.isLoweredToCall(CalledF) ||
         CalledF->hasDLLImportStorageClass())
-      return; 
-    uint64_t &Count = Counts[std::make_pair(F, CalledF)]; 
-    Count = SaturatingAdd(Count, NewCount); 
-  }; 
-  // Ignore error here.  Indirect calls are ignored if this fails. 
-  (void)(bool) Symtab.create(M); 
-  for (auto &F : M) { 
-    // Avoid extra cost of running passes for BFI when the function doesn't have 
-    // entry count. Since LazyBlockFrequencyInfoPass only exists in LPM, check 
-    // if using LazyBlockFrequencyInfoPass. 
-    // TODO: Remove LazyBFI when LazyBlockFrequencyInfoPass is available in NPM. 
-    if (F.isDeclaration() || (LazyBFI && !F.getEntryCount())) 
-      continue; 
-    auto &BFI = GetBFI(F); 
-    if (BFI.getEntryFreq() == 0) 
-      continue; 
-    TargetTransformInfo &TTI = GetTTI(F); 
-    for (auto &BB : F) { 
-      Optional<uint64_t> BBCount = BFI.getBlockProfileCount(&BB); 
-      if (!BBCount) 
-        continue; 
-      for (auto &I : BB) { 
-        CallBase *CB = dyn_cast<CallBase>(&I); 
-        if (!CB) 
-          continue; 
-        if (CB->isIndirectCall()) { 
-          InstrProfValueData ValueData[8]; 
-          uint32_t ActualNumValueData; 
-          uint64_t TotalC; 
-          if (!getValueProfDataFromInst(*CB, IPVK_IndirectCallTarget, 8, 
-                                        ValueData, ActualNumValueData, TotalC)) 
-            continue; 
-          for (const auto &VD : 
-               ArrayRef<InstrProfValueData>(ValueData, ActualNumValueData)) { 
-            UpdateCounts(TTI, &F, Symtab.getFunction(VD.Value), VD.Count); 
-          } 
-          continue; 
-        } 
-        UpdateCounts(TTI, &F, CB->getCalledFunction(), *BBCount); 
-      } 
-    } 
-  } 
- 
-  return addModuleFlags(M, Counts); 
-} 
- 
-namespace { 
-struct CGProfileLegacyPass final : public ModulePass { 
-  static char ID; 
-  CGProfileLegacyPass() : ModulePass(ID) { 
-    initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry()); 
-  } 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.setPreservesCFG(); 
-    AU.addRequired<LazyBlockFrequencyInfoPass>(); 
-    AU.addRequired<TargetTransformInfoWrapperPass>(); 
-  } 
- 
-  bool runOnModule(Module &M) override { 
-    auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & { 
-      return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI(); 
-    }; 
-    auto GetTTI = [this](Function &F) -> TargetTransformInfo & { 
-      return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 
-    }; 
- 
-    return runCGProfilePass(M, GetBFI, GetTTI, true); 
-  } 
-}; 
- 
-} // namespace 
- 
-char CGProfileLegacyPass::ID = 0; 
- 
-INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false, 
-                false) 
- 
-ModulePass *llvm::createCGProfileLegacyPass() { 
-  return new CGProfileLegacyPass(); 
-} 
- 
-PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) { 
-  FunctionAnalysisManager &FAM = 
-      MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 
-  auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { 
-    return FAM.getResult<BlockFrequencyAnalysis>(F); 
-  }; 
-  auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { 
-    return FAM.getResult<TargetIRAnalysis>(F); 
-  }; 
- 
-  runCGProfilePass(M, GetBFI, GetTTI, false); 
- 
-  return PreservedAnalyses::all(); 
-} 
+      return;
+    uint64_t &Count = Counts[std::make_pair(F, CalledF)];
+    Count = SaturatingAdd(Count, NewCount);
+  };
+  // Ignore error here.  Indirect calls are ignored if this fails.
+  (void)(bool) Symtab.create(M);
+  for (auto &F : M) {
+    // Avoid extra cost of running passes for BFI when the function doesn't have
+    // entry count. Since LazyBlockFrequencyInfoPass only exists in LPM, check
+    // if using LazyBlockFrequencyInfoPass.
+    // TODO: Remove LazyBFI when LazyBlockFrequencyInfoPass is available in NPM.
+    if (F.isDeclaration() || (LazyBFI && !F.getEntryCount()))
+      continue;
+    auto &BFI = GetBFI(F);
+    if (BFI.getEntryFreq() == 0)
+      continue;
+    TargetTransformInfo &TTI = GetTTI(F);
+    for (auto &BB : F) {
+      Optional<uint64_t> BBCount = BFI.getBlockProfileCount(&BB);
+      if (!BBCount)
+        continue;
+      for (auto &I : BB) {
+        CallBase *CB = dyn_cast<CallBase>(&I);
+        if (!CB)
+          continue;
+        if (CB->isIndirectCall()) {
+          InstrProfValueData ValueData[8];
+          uint32_t ActualNumValueData;
+          uint64_t TotalC;
+          if (!getValueProfDataFromInst(*CB, IPVK_IndirectCallTarget, 8,
+                                        ValueData, ActualNumValueData, TotalC))
+            continue;
+          for (const auto &VD :
+               ArrayRef<InstrProfValueData>(ValueData, ActualNumValueData)) {
+            UpdateCounts(TTI, &F, Symtab.getFunction(VD.Value), VD.Count);
+          }
+          continue;
+        }
+        UpdateCounts(TTI, &F, CB->getCalledFunction(), *BBCount);
+      }
+    }
+  }
+
+  return addModuleFlags(M, Counts);
+}
+
+namespace {
+struct CGProfileLegacyPass final : public ModulePass {
+  static char ID;
+  CGProfileLegacyPass() : ModulePass(ID) {
+    initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<LazyBlockFrequencyInfoPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+  }
+
+  bool runOnModule(Module &M) override {
+    auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
+      return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI();
+    };
+    auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
+      return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    };
+
+    return runCGProfilePass(M, GetBFI, GetTTI, true);
+  }
+};
+
+} // namespace
+
+char CGProfileLegacyPass::ID = 0;
+
+INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false,
+                false)
+
+ModulePass *llvm::createCGProfileLegacyPass() {
+  return new CGProfileLegacyPass();
+}
+
+PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
+  FunctionAnalysisManager &FAM =
+      MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
+    return FAM.getResult<BlockFrequencyAnalysis>(F);
+  };
+  auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
+    return FAM.getResult<TargetIRAnalysis>(F);
+  };
+
+  runCGProfilePass(M, GetBFI, GetTTI, false);
+
+  return PreservedAnalyses::all();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 6fdeb88658..927c34180d 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -1,2103 +1,2103 @@
-//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This pass merges conditional blocks of code and reduces the number of 
-// conditional branches in the hot paths based on profiles. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 
-#include "llvm/ADT/DenseMap.h" 
-#include "llvm/ADT/DenseSet.h" 
-#include "llvm/ADT/SmallVector.h" 
-#include "llvm/ADT/StringSet.h" 
-#include "llvm/Analysis/BlockFrequencyInfo.h" 
-#include "llvm/Analysis/GlobalsModRef.h" 
-#include "llvm/Analysis/OptimizationRemarkEmitter.h" 
-#include "llvm/Analysis/ProfileSummaryInfo.h" 
-#include "llvm/Analysis/RegionInfo.h" 
-#include "llvm/Analysis/RegionIterator.h" 
-#include "llvm/Analysis/ValueTracking.h" 
-#include "llvm/IR/CFG.h" 
-#include "llvm/IR/Dominators.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/MDBuilder.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Support/BranchProbability.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/MemoryBuffer.h" 
-#include "llvm/Transforms/Utils.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include "llvm/Transforms/Utils/Cloning.h" 
-#include "llvm/Transforms/Utils/ValueMapper.h" 
- 
-#include <set> 
-#include <sstream> 
- 
-using namespace llvm; 
- 
-#define DEBUG_TYPE "chr" 
- 
-#define CHR_DEBUG(X) LLVM_DEBUG(X) 
- 
-static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 
-                              cl::desc("Apply CHR for all functions")); 
- 
-static cl::opt<double> CHRBiasThreshold( 
-    "chr-bias-threshold", cl::init(0.99), cl::Hidden, 
-    cl::desc("CHR considers a branch bias greater than this ratio as biased")); 
- 
-static cl::opt<unsigned> CHRMergeThreshold( 
-    "chr-merge-threshold", cl::init(2), cl::Hidden, 
-    cl::desc("CHR merges a group of N branches/selects where N >= this value")); 
- 
-static cl::opt<std::string> CHRModuleList( 
-    "chr-module-list", cl::init(""), cl::Hidden, 
-    cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 
- 
-static cl::opt<std::string> CHRFunctionList( 
-    "chr-function-list", cl::init(""), cl::Hidden, 
-    cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 
- 
-static StringSet<> CHRModules; 
-static StringSet<> CHRFunctions; 
- 
-static void parseCHRFilterFiles() { 
-  if (!CHRModuleList.empty()) { 
-    auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 
-    if (!FileOrErr) { 
-      errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 
-      std::exit(1); 
-    } 
-    StringRef Buf = FileOrErr->get()->getBuffer(); 
-    SmallVector<StringRef, 0> Lines; 
-    Buf.split(Lines, '\n'); 
-    for (StringRef Line : Lines) { 
-      Line = Line.trim(); 
-      if (!Line.empty()) 
-        CHRModules.insert(Line); 
-    } 
-  } 
-  if (!CHRFunctionList.empty()) { 
-    auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 
-    if (!FileOrErr) { 
-      errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 
-      std::exit(1); 
-    } 
-    StringRef Buf = FileOrErr->get()->getBuffer(); 
-    SmallVector<StringRef, 0> Lines; 
-    Buf.split(Lines, '\n'); 
-    for (StringRef Line : Lines) { 
-      Line = Line.trim(); 
-      if (!Line.empty()) 
-        CHRFunctions.insert(Line); 
-    } 
-  } 
-} 
- 
-namespace { 
-class ControlHeightReductionLegacyPass : public FunctionPass { 
-public: 
-  static char ID; 
- 
-  ControlHeightReductionLegacyPass() : FunctionPass(ID) { 
-    initializeControlHeightReductionLegacyPassPass( 
-        *PassRegistry::getPassRegistry()); 
-    parseCHRFilterFiles(); 
-  } 
- 
-  bool runOnFunction(Function &F) override; 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.addRequired<BlockFrequencyInfoWrapperPass>(); 
-    AU.addRequired<DominatorTreeWrapperPass>(); 
-    AU.addRequired<ProfileSummaryInfoWrapperPass>(); 
-    AU.addRequired<RegionInfoPass>(); 
-    AU.addPreserved<GlobalsAAWrapperPass>(); 
-  } 
-}; 
-} // end anonymous namespace 
- 
-char ControlHeightReductionLegacyPass::ID = 0; 
- 
-INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 
-                      "chr", 
-                      "Reduce control height in the hot paths", 
-                      false, false) 
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 
-INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 
-INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 
-                    "chr", 
-                    "Reduce control height in the hot paths", 
-                    false, false) 
- 
-FunctionPass *llvm::createControlHeightReductionLegacyPass() { 
-  return new ControlHeightReductionLegacyPass(); 
-} 
- 
-namespace { 
- 
-struct CHRStats { 
-  CHRStats() : NumBranches(0), NumBranchesDelta(0), 
-               WeightedNumBranchesDelta(0) {} 
-  void print(raw_ostream &OS) const { 
-    OS << "CHRStats: NumBranches " << NumBranches 
-       << " NumBranchesDelta " << NumBranchesDelta 
-       << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 
-  } 
-  uint64_t NumBranches;       // The original number of conditional branches / 
-                              // selects 
-  uint64_t NumBranchesDelta;  // The decrease of the number of conditional 
-                              // branches / selects in the hot paths due to CHR. 
-  uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 
-                                     // count at the scope entry. 
-}; 
- 
-// RegInfo - some properties of a Region. 
-struct RegInfo { 
-  RegInfo() : R(nullptr), HasBranch(false) {} 
-  RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 
-  Region *R; 
-  bool HasBranch; 
-  SmallVector<SelectInst *, 8> Selects; 
-}; 
- 
-typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 
- 
-// CHRScope - a sequence of regions to CHR together. It corresponds to a 
-// sequence of conditional blocks. It can have subscopes which correspond to 
-// nested conditional blocks. Nested CHRScopes form a tree. 
-class CHRScope { 
- public: 
-  CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 
-    assert(RI.R && "Null RegionIn"); 
-    RegInfos.push_back(RI); 
-  } 
- 
-  Region *getParentRegion() { 
-    assert(RegInfos.size() > 0 && "Empty CHRScope"); 
-    Region *Parent = RegInfos[0].R->getParent(); 
-    assert(Parent && "Unexpected to call this on the top-level region"); 
-    return Parent; 
-  } 
- 
-  BasicBlock *getEntryBlock() { 
-    assert(RegInfos.size() > 0 && "Empty CHRScope"); 
-    return RegInfos.front().R->getEntry(); 
-  } 
- 
-  BasicBlock *getExitBlock() { 
-    assert(RegInfos.size() > 0 && "Empty CHRScope"); 
-    return RegInfos.back().R->getExit(); 
-  } 
- 
-  bool appendable(CHRScope *Next) { 
-    // The next scope is appendable only if this scope is directly connected to 
-    // it (which implies it post-dominates this scope) and this scope dominates 
-    // it (no edge to the next scope outside this scope). 
-    BasicBlock *NextEntry = Next->getEntryBlock(); 
-    if (getExitBlock() != NextEntry) 
-      // Not directly connected. 
-      return false; 
-    Region *LastRegion = RegInfos.back().R; 
-    for (BasicBlock *Pred : predecessors(NextEntry)) 
-      if (!LastRegion->contains(Pred)) 
-        // There's an edge going into the entry of the next scope from outside 
-        // of this scope. 
-        return false; 
-    return true; 
-  } 
- 
-  void append(CHRScope *Next) { 
-    assert(RegInfos.size() > 0 && "Empty CHRScope"); 
-    assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 
-    assert(getParentRegion() == Next->getParentRegion() && 
-           "Must be siblings"); 
-    assert(getExitBlock() == Next->getEntryBlock() && 
-           "Must be adjacent"); 
-    RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end()); 
-    Subs.append(Next->Subs.begin(), Next->Subs.end()); 
-  } 
- 
-  void addSub(CHRScope *SubIn) { 
-#ifndef NDEBUG 
-    bool IsChild = false; 
-    for (RegInfo &RI : RegInfos) 
-      if (RI.R == SubIn->getParentRegion()) { 
-        IsChild = true; 
-        break; 
-      } 
-    assert(IsChild && "Must be a child"); 
-#endif 
-    Subs.push_back(SubIn); 
-  } 
- 
-  // Split this scope at the boundary region into two, which will belong to the 
-  // tail and returns the tail. 
-  CHRScope *split(Region *Boundary) { 
-    assert(Boundary && "Boundary null"); 
-    assert(RegInfos.begin()->R != Boundary && 
-           "Can't be split at beginning"); 
-    auto BoundaryIt = llvm::find_if( 
-        RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; }); 
-    if (BoundaryIt == RegInfos.end()) 
-      return nullptr; 
-    ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end()); 
-    DenseSet<Region *> TailRegionSet; 
-    for (const RegInfo &RI : TailRegInfos) 
-      TailRegionSet.insert(RI.R); 
- 
-    auto TailIt = 
-        std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) { 
-          assert(Sub && "null Sub"); 
-          Region *Parent = Sub->getParentRegion(); 
-          if (TailRegionSet.count(Parent)) 
-            return false; 
- 
+//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges conditional blocks of code and reduces the number of
+// conditional branches in the hot paths based on profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "chr"
+
+#define CHR_DEBUG(X) LLVM_DEBUG(X)
+
+static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
+                              cl::desc("Apply CHR for all functions"));
+
+static cl::opt<double> CHRBiasThreshold(
+    "chr-bias-threshold", cl::init(0.99), cl::Hidden,
+    cl::desc("CHR considers a branch bias greater than this ratio as biased"));
+
+static cl::opt<unsigned> CHRMergeThreshold(
+    "chr-merge-threshold", cl::init(2), cl::Hidden,
+    cl::desc("CHR merges a group of N branches/selects where N >= this value"));
+
+static cl::opt<std::string> CHRModuleList(
+    "chr-module-list", cl::init(""), cl::Hidden,
+    cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
+
+static cl::opt<std::string> CHRFunctionList(
+    "chr-function-list", cl::init(""), cl::Hidden,
+    cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
+
+static StringSet<> CHRModules;
+static StringSet<> CHRFunctions;
+
+static void parseCHRFilterFiles() {
+  if (!CHRModuleList.empty()) {
+    auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
+    if (!FileOrErr) {
+      errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
+      std::exit(1);
+    }
+    StringRef Buf = FileOrErr->get()->getBuffer();
+    SmallVector<StringRef, 0> Lines;
+    Buf.split(Lines, '\n');
+    for (StringRef Line : Lines) {
+      Line = Line.trim();
+      if (!Line.empty())
+        CHRModules.insert(Line);
+    }
+  }
+  if (!CHRFunctionList.empty()) {
+    auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
+    if (!FileOrErr) {
+      errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
+      std::exit(1);
+    }
+    StringRef Buf = FileOrErr->get()->getBuffer();
+    SmallVector<StringRef, 0> Lines;
+    Buf.split(Lines, '\n');
+    for (StringRef Line : Lines) {
+      Line = Line.trim();
+      if (!Line.empty())
+        CHRFunctions.insert(Line);
+    }
+  }
+}
+
+namespace {
+class ControlHeightReductionLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  ControlHeightReductionLegacyPass() : FunctionPass(ID) {
+    initializeControlHeightReductionLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+    parseCHRFilterFiles();
+  }
+
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<RegionInfoPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+};
+} // end anonymous namespace
+
+char ControlHeightReductionLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
+                      "chr",
+                      "Reduce control height in the hot paths",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
+INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
+                    "chr",
+                    "Reduce control height in the hot paths",
+                    false, false)
+
+FunctionPass *llvm::createControlHeightReductionLegacyPass() {
+  return new ControlHeightReductionLegacyPass();
+}
+
+namespace {
+
+struct CHRStats {
+  CHRStats() : NumBranches(0), NumBranchesDelta(0),
+               WeightedNumBranchesDelta(0) {}
+  void print(raw_ostream &OS) const {
+    OS << "CHRStats: NumBranches " << NumBranches
+       << " NumBranchesDelta " << NumBranchesDelta
+       << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
+  }
+  uint64_t NumBranches;       // The original number of conditional branches /
+                              // selects
+  uint64_t NumBranchesDelta;  // The decrease of the number of conditional
+                              // branches / selects in the hot paths due to CHR.
+  uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
+                                     // count at the scope entry.
+};
+
+// RegInfo - some properties of a Region.
+struct RegInfo {
+  RegInfo() : R(nullptr), HasBranch(false) {}
+  RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
+  Region *R;
+  bool HasBranch;
+  SmallVector<SelectInst *, 8> Selects;
+};
+
+typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
+
+// CHRScope - a sequence of regions to CHR together. It corresponds to a
+// sequence of conditional blocks. It can have subscopes which correspond to
+// nested conditional blocks. Nested CHRScopes form a tree.
+class CHRScope {
+ public:
+  CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
+    assert(RI.R && "Null RegionIn");
+    RegInfos.push_back(RI);
+  }
+
+  Region *getParentRegion() {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    Region *Parent = RegInfos[0].R->getParent();
+    assert(Parent && "Unexpected to call this on the top-level region");
+    return Parent;
+  }
+
+  BasicBlock *getEntryBlock() {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    return RegInfos.front().R->getEntry();
+  }
+
+  BasicBlock *getExitBlock() {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    return RegInfos.back().R->getExit();
+  }
+
+  bool appendable(CHRScope *Next) {
+    // The next scope is appendable only if this scope is directly connected to
+    // it (which implies it post-dominates this scope) and this scope dominates
+    // it (no edge to the next scope outside this scope).
+    BasicBlock *NextEntry = Next->getEntryBlock();
+    if (getExitBlock() != NextEntry)
+      // Not directly connected.
+      return false;
+    Region *LastRegion = RegInfos.back().R;
+    for (BasicBlock *Pred : predecessors(NextEntry))
+      if (!LastRegion->contains(Pred))
+        // There's an edge going into the entry of the next scope from outside
+        // of this scope.
+        return false;
+    return true;
+  }
+
+  void append(CHRScope *Next) {
+    assert(RegInfos.size() > 0 && "Empty CHRScope");
+    assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
+    assert(getParentRegion() == Next->getParentRegion() &&
+           "Must be siblings");
+    assert(getExitBlock() == Next->getEntryBlock() &&
+           "Must be adjacent");
+    RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
+    Subs.append(Next->Subs.begin(), Next->Subs.end());
+  }
+
+  void addSub(CHRScope *SubIn) {
+#ifndef NDEBUG
+    bool IsChild = false;
+    for (RegInfo &RI : RegInfos)
+      if (RI.R == SubIn->getParentRegion()) {
+        IsChild = true;
+        break;
+      }
+    assert(IsChild && "Must be a child");
+#endif
+    Subs.push_back(SubIn);
+  }
+
+  // Split this scope at the boundary region into two, which will belong to the
+  // tail and returns the tail.
+  CHRScope *split(Region *Boundary) {
+    assert(Boundary && "Boundary null");
+    assert(RegInfos.begin()->R != Boundary &&
+           "Can't be split at beginning");
+    auto BoundaryIt = llvm::find_if(
+        RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
+    if (BoundaryIt == RegInfos.end())
+      return nullptr;
+    ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
+    DenseSet<Region *> TailRegionSet;
+    for (const RegInfo &RI : TailRegInfos)
+      TailRegionSet.insert(RI.R);
+
+    auto TailIt =
+        std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
+          assert(Sub && "null Sub");
+          Region *Parent = Sub->getParentRegion();
+          if (TailRegionSet.count(Parent))
+            return false;
+
           assert(llvm::any_of(
                      RegInfos,
                      [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
-                 "Must be in head"); 
-          return true; 
-        }); 
-    ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end()); 
- 
-    assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 
-    auto *Scope = new CHRScope(TailRegInfos, TailSubs); 
-    RegInfos.erase(BoundaryIt, RegInfos.end()); 
-    Subs.erase(TailIt, Subs.end()); 
-    return Scope; 
-  } 
- 
-  bool contains(Instruction *I) const { 
-    BasicBlock *Parent = I->getParent(); 
-    for (const RegInfo &RI : RegInfos) 
-      if (RI.R->contains(Parent)) 
-        return true; 
-    return false; 
-  } 
- 
-  void print(raw_ostream &OS) const; 
- 
-  SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 
-  SmallVector<CHRScope *, 8> Subs;  // Subscopes. 
- 
-  // The instruction at which to insert the CHR conditional branch (and hoist 
-  // the dependent condition values). 
-  Instruction *BranchInsertPoint; 
- 
-  // True-biased and false-biased regions (conditional blocks), 
-  // respectively. Used only for the outermost scope and includes regions in 
-  // subscopes. The rest are unbiased. 
-  DenseSet<Region *> TrueBiasedRegions; 
-  DenseSet<Region *> FalseBiasedRegions; 
-  // Among the biased regions, the regions that get CHRed. 
-  SmallVector<RegInfo, 8> CHRRegions; 
- 
-  // True-biased and false-biased selects, respectively. Used only for the 
-  // outermost scope and includes ones in subscopes. 
-  DenseSet<SelectInst *> TrueBiasedSelects; 
-  DenseSet<SelectInst *> FalseBiasedSelects; 
- 
-  // Map from one of the above regions to the instructions to stop 
-  // hoisting instructions at through use-def chains. 
-  HoistStopMapTy HoistStopMap; 
- 
- private: 
-   CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn) 
-       : RegInfos(RegInfosIn.begin(), RegInfosIn.end()), 
-         Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {} 
-}; 
- 
-class CHR { 
- public: 
-  CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 
-      ProfileSummaryInfo &PSIin, RegionInfo &RIin, 
-      OptimizationRemarkEmitter &OREin) 
-      : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 
- 
-  ~CHR() { 
-    for (CHRScope *Scope : Scopes) { 
-      delete Scope; 
-    } 
-  } 
- 
-  bool run(); 
- 
- private: 
-  // See the comments in CHR::run() for the high level flow of the algorithm and 
-  // what the following functions do. 
- 
-  void findScopes(SmallVectorImpl<CHRScope *> &Output) { 
-    Region *R = RI.getTopLevelRegion(); 
-    if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) { 
-      Output.push_back(Scope); 
-    } 
-  } 
-  CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 
-                        SmallVectorImpl<CHRScope *> &Scopes); 
-  CHRScope *findScope(Region *R); 
-  void checkScopeHoistable(CHRScope *Scope); 
- 
-  void splitScopes(SmallVectorImpl<CHRScope *> &Input, 
-                   SmallVectorImpl<CHRScope *> &Output); 
-  SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 
-                                        CHRScope *Outer, 
-                                        DenseSet<Value *> *OuterConditionValues, 
-                                        Instruction *OuterInsertPoint, 
-                                        SmallVectorImpl<CHRScope *> &Output, 
-                                        DenseSet<Instruction *> &Unhoistables); 
- 
-  void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 
-  void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 
- 
-  void filterScopes(SmallVectorImpl<CHRScope *> &Input, 
-                    SmallVectorImpl<CHRScope *> &Output); 
- 
-  void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 
-                     SmallVectorImpl<CHRScope *> &Output); 
-  void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 
- 
-  void sortScopes(SmallVectorImpl<CHRScope *> &Input, 
-                  SmallVectorImpl<CHRScope *> &Output); 
- 
-  void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 
-  void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 
-  void cloneScopeBlocks(CHRScope *Scope, 
-                        BasicBlock *PreEntryBlock, 
-                        BasicBlock *ExitBlock, 
-                        Region *LastRegion, 
-                        ValueToValueMapTy &VMap); 
-  BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 
-                                 BasicBlock *EntryBlock, 
-                                 BasicBlock *NewEntryBlock, 
-                                 ValueToValueMapTy &VMap); 
-  void fixupBranchesAndSelects(CHRScope *Scope, 
-                               BasicBlock *PreEntryBlock, 
-                               BranchInst *MergedBR, 
-                               uint64_t ProfileCount); 
-  void fixupBranch(Region *R, 
-                   CHRScope *Scope, 
-                   IRBuilder<> &IRB, 
-                   Value *&MergedCondition, BranchProbability &CHRBranchBias); 
-  void fixupSelect(SelectInst* SI, 
-                   CHRScope *Scope, 
-                   IRBuilder<> &IRB, 
-                   Value *&MergedCondition, BranchProbability &CHRBranchBias); 
-  void addToMergedCondition(bool IsTrueBiased, Value *Cond, 
-                            Instruction *BranchOrSelect, 
-                            CHRScope *Scope, 
-                            IRBuilder<> &IRB, 
-                            Value *&MergedCondition); 
- 
-  Function &F; 
-  BlockFrequencyInfo &BFI; 
-  DominatorTree &DT; 
-  ProfileSummaryInfo &PSI; 
-  RegionInfo &RI; 
-  OptimizationRemarkEmitter &ORE; 
-  CHRStats Stats; 
- 
-  // All the true-biased regions in the function 
-  DenseSet<Region *> TrueBiasedRegionsGlobal; 
-  // All the false-biased regions in the function 
-  DenseSet<Region *> FalseBiasedRegionsGlobal; 
-  // All the true-biased selects in the function 
-  DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 
-  // All the false-biased selects in the function 
-  DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 
-  // A map from biased regions to their branch bias 
-  DenseMap<Region *, BranchProbability> BranchBiasMap; 
-  // A map from biased selects to their branch bias 
-  DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 
-  // All the scopes. 
-  DenseSet<CHRScope *> Scopes; 
-}; 
- 
-} // end anonymous namespace 
- 
-static inline 
-raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 
-                                              const CHRStats &Stats) { 
-  Stats.print(OS); 
-  return OS; 
-} 
- 
-static inline 
-raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 
-  Scope.print(OS); 
-  return OS; 
-} 
- 
-static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 
-  if (ForceCHR) 
-    return true; 
- 
-  if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 
-    if (CHRModules.count(F.getParent()->getName())) 
-      return true; 
-    return CHRFunctions.count(F.getName()); 
-  } 
- 
-  assert(PSI.hasProfileSummary() && "Empty PSI?"); 
-  return PSI.isFunctionEntryHot(&F); 
-} 
- 
-static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 
-                                         CHRStats *Stats) { 
-  StringRef FuncName = F.getName(); 
-  StringRef ModuleName = F.getParent()->getName(); 
-  (void)(FuncName); // Unused in release build. 
-  (void)(ModuleName); // Unused in release build. 
-  CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 
-            << FuncName); 
-  if (Stats) 
-    CHR_DEBUG(dbgs() << " " << *Stats); 
-  CHR_DEBUG(dbgs() << "\n"); 
-  CHR_DEBUG(F.dump()); 
-} 
- 
-void CHRScope::print(raw_ostream &OS) const { 
-  assert(RegInfos.size() > 0 && "Empty CHRScope"); 
-  OS << "CHRScope["; 
-  OS << RegInfos.size() << ", Regions["; 
-  for (const RegInfo &RI : RegInfos) { 
-    OS << RI.R->getNameStr(); 
-    if (RI.HasBranch) 
-      OS << " B"; 
-    if (RI.Selects.size() > 0) 
-      OS << " S" << RI.Selects.size(); 
-    OS << ", "; 
-  } 
-  if (RegInfos[0].R->getParent()) { 
-    OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 
-  } else { 
-    // top level region 
-    OS << "]"; 
-  } 
-  OS << ", Subs["; 
-  for (CHRScope *Sub : Subs) { 
-    OS << *Sub << ", "; 
-  } 
-  OS << "]]"; 
-} 
- 
-// Return true if the given instruction type can be hoisted by CHR. 
-static bool isHoistableInstructionType(Instruction *I) { 
-  return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 
-      isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 
-      isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 
-      isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 
-      isa<InsertValueInst>(I); 
-} 
- 
-// Return true if the given instruction can be hoisted by CHR. 
-static bool isHoistable(Instruction *I, DominatorTree &DT) { 
-  if (!isHoistableInstructionType(I)) 
-    return false; 
-  return isSafeToSpeculativelyExecute(I, nullptr, &DT); 
-} 
- 
-// Recursively traverse the use-def chains of the given value and return a set 
-// of the unhoistable base values defined within the scope (excluding the 
-// first-region entry block) or the (hoistable or unhoistable) base values that 
-// are defined outside (including the first-region entry block) of the 
-// scope. The returned set doesn't include constants. 
-static const std::set<Value *> & 
-getBaseValues(Value *V, DominatorTree &DT, 
-              DenseMap<Value *, std::set<Value *>> &Visited) { 
-  auto It = Visited.find(V); 
-  if (It != Visited.end()) { 
-    return It->second; 
-  } 
-  std::set<Value *> Result; 
-  if (auto *I = dyn_cast<Instruction>(V)) { 
-    // We don't stop at a block that's not in the Scope because we would miss 
-    // some instructions that are based on the same base values if we stop 
-    // there. 
-    if (!isHoistable(I, DT)) { 
-      Result.insert(I); 
-      return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 
-    } 
-    // I is hoistable above the Scope. 
-    for (Value *Op : I->operands()) { 
-      const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited); 
-      Result.insert(OpResult.begin(), OpResult.end()); 
-    } 
-    return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 
-  } 
-  if (isa<Argument>(V)) { 
-    Result.insert(V); 
-  } 
-  // We don't include others like constants because those won't lead to any 
-  // chance of folding of conditions (eg two bit checks merged into one check) 
-  // after CHR. 
-  return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 
-} 
- 
-// Return true if V is already hoisted or can be hoisted (along with its 
-// operands) above the insert point. When it returns true and HoistStops is 
-// non-null, the instructions to stop hoisting at through the use-def chains are 
-// inserted into HoistStops. 
-static bool 
-checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 
-                DenseSet<Instruction *> &Unhoistables, 
-                DenseSet<Instruction *> *HoistStops, 
-                DenseMap<Instruction *, bool> &Visited) { 
-  assert(InsertPoint && "Null InsertPoint"); 
-  if (auto *I = dyn_cast<Instruction>(V)) { 
-    auto It = Visited.find(I); 
-    if (It != Visited.end()) { 
-      return It->second; 
-    } 
-    assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 
-    assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 
-    if (Unhoistables.count(I)) { 
-      // Don't hoist if they are not to be hoisted. 
-      Visited[I] = false; 
-      return false; 
-    } 
-    if (DT.dominates(I, InsertPoint)) { 
-      // We are already above the insert point. Stop here. 
-      if (HoistStops) 
-        HoistStops->insert(I); 
-      Visited[I] = true; 
-      return true; 
-    } 
-    // We aren't not above the insert point, check if we can hoist it above the 
-    // insert point. 
-    if (isHoistable(I, DT)) { 
-      // Check operands first. 
-      DenseSet<Instruction *> OpsHoistStops; 
-      bool AllOpsHoisted = true; 
-      for (Value *Op : I->operands()) { 
-        if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 
-                             Visited)) { 
-          AllOpsHoisted = false; 
-          break; 
-        } 
-      } 
-      if (AllOpsHoisted) { 
-        CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 
-        if (HoistStops) 
-          HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 
-        Visited[I] = true; 
-        return true; 
-      } 
-    } 
-    Visited[I] = false; 
-    return false; 
-  } 
-  // Non-instructions are considered hoistable. 
-  return true; 
-} 
- 
-// Returns true and sets the true probability and false probability of an 
-// MD_prof metadata if it's well-formed. 
-static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, 
-                        BranchProbability &FalseProb) { 
-  if (!MD) return false; 
-  MDString *MDName = cast<MDString>(MD->getOperand(0)); 
-  if (MDName->getString() != "branch_weights" || 
-      MD->getNumOperands() != 3) 
-    return false; 
-  ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 
-  ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 
-  if (!TrueWeight || !FalseWeight) 
-    return false; 
-  uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 
-  uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 
-  uint64_t SumWt = TrueWt + FalseWt; 
- 
-  assert(SumWt >= TrueWt && SumWt >= FalseWt && 
-         "Overflow calculating branch probabilities."); 
- 
-  // Guard against 0-to-0 branch weights to avoid a division-by-zero crash. 
-  if (SumWt == 0) 
-    return false; 
- 
-  TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 
-  FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 
-  return true; 
-} 
- 
-static BranchProbability getCHRBiasThreshold() { 
-  return BranchProbability::getBranchProbability( 
-      static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 
-} 
- 
-// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 
-// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 
-// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 
-// false. 
-template <typename K, typename S, typename M> 
-static bool checkBias(K *Key, BranchProbability TrueProb, 
-                      BranchProbability FalseProb, S &TrueSet, S &FalseSet, 
-                      M &BiasMap) { 
-  BranchProbability Threshold = getCHRBiasThreshold(); 
-  if (TrueProb >= Threshold) { 
-    TrueSet.insert(Key); 
-    BiasMap[Key] = TrueProb; 
-    return true; 
-  } else if (FalseProb >= Threshold) { 
-    FalseSet.insert(Key); 
-    BiasMap[Key] = FalseProb; 
-    return true; 
-  } 
-  return false; 
-} 
- 
-// Returns true and insert a region into the right biased set and the map if the 
-// branch of the region is biased. 
-static bool checkBiasedBranch(BranchInst *BI, Region *R, 
-                              DenseSet<Region *> &TrueBiasedRegionsGlobal, 
-                              DenseSet<Region *> &FalseBiasedRegionsGlobal, 
-                              DenseMap<Region *, BranchProbability> &BranchBiasMap) { 
-  if (!BI->isConditional()) 
-    return false; 
-  BranchProbability ThenProb, ElseProb; 
-  if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), 
-                   ThenProb, ElseProb)) 
-    return false; 
-  BasicBlock *IfThen = BI->getSuccessor(0); 
-  BasicBlock *IfElse = BI->getSuccessor(1); 
-  assert((IfThen == R->getExit() || IfElse == R->getExit()) && 
-         IfThen != IfElse && 
-         "Invariant from findScopes"); 
-  if (IfThen == R->getExit()) { 
-    // Swap them so that IfThen/ThenProb means going into the conditional code 
-    // and IfElse/ElseProb means skipping it. 
-    std::swap(IfThen, IfElse); 
-    std::swap(ThenProb, ElseProb); 
-  } 
-  CHR_DEBUG(dbgs() << "BI " << *BI << " "); 
-  CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 
-  CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 
-  return checkBias(R, ThenProb, ElseProb, 
-                   TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 
-                   BranchBiasMap); 
-} 
- 
-// Returns true and insert a select into the right biased set and the map if the 
-// select is biased. 
-static bool checkBiasedSelect( 
-    SelectInst *SI, Region *R, 
-    DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 
-    DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 
-    DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 
-  BranchProbability TrueProb, FalseProb; 
-  if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), 
-                   TrueProb, FalseProb)) 
-    return false; 
-  CHR_DEBUG(dbgs() << "SI " << *SI << " "); 
-  CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 
-  CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 
-  return checkBias(SI, TrueProb, FalseProb, 
-                   TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 
-                   SelectBiasMap); 
-} 
- 
-// Returns the instruction at which to hoist the dependent condition values and 
-// insert the CHR branch for a region. This is the terminator branch in the 
-// entry block or the first select in the entry block, if any. 
-static Instruction* getBranchInsertPoint(RegInfo &RI) { 
-  Region *R = RI.R; 
-  BasicBlock *EntryBB = R->getEntry(); 
-  // The hoist point is by default the terminator of the entry block, which is 
-  // the same as the branch instruction if RI.HasBranch is true. 
-  Instruction *HoistPoint = EntryBB->getTerminator(); 
-  for (SelectInst *SI : RI.Selects) { 
-    if (SI->getParent() == EntryBB) { 
-      // Pick the first select in Selects in the entry block.  Note Selects is 
-      // sorted in the instruction order within a block (asserted below). 
-      HoistPoint = SI; 
-      break; 
-    } 
-  } 
-  assert(HoistPoint && "Null HoistPoint"); 
-#ifndef NDEBUG 
-  // Check that HoistPoint is the first one in Selects in the entry block, 
-  // if any. 
-  DenseSet<Instruction *> EntryBlockSelectSet; 
-  for (SelectInst *SI : RI.Selects) { 
-    if (SI->getParent() == EntryBB) { 
-      EntryBlockSelectSet.insert(SI); 
-    } 
-  } 
-  for (Instruction &I : *EntryBB) { 
+                 "Must be in head");
+          return true;
+        });
+    ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
+
+    assert(HoistStopMap.empty() && "MapHoistStops must be empty");
+    auto *Scope = new CHRScope(TailRegInfos, TailSubs);
+    RegInfos.erase(BoundaryIt, RegInfos.end());
+    Subs.erase(TailIt, Subs.end());
+    return Scope;
+  }
+
+  bool contains(Instruction *I) const {
+    BasicBlock *Parent = I->getParent();
+    for (const RegInfo &RI : RegInfos)
+      if (RI.R->contains(Parent))
+        return true;
+    return false;
+  }
+
+  void print(raw_ostream &OS) const;
+
+  SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
+  SmallVector<CHRScope *, 8> Subs;  // Subscopes.
+
+  // The instruction at which to insert the CHR conditional branch (and hoist
+  // the dependent condition values).
+  Instruction *BranchInsertPoint;
+
+  // True-biased and false-biased regions (conditional blocks),
+  // respectively. Used only for the outermost scope and includes regions in
+  // subscopes. The rest are unbiased.
+  DenseSet<Region *> TrueBiasedRegions;
+  DenseSet<Region *> FalseBiasedRegions;
+  // Among the biased regions, the regions that get CHRed.
+  SmallVector<RegInfo, 8> CHRRegions;
+
+  // True-biased and false-biased selects, respectively. Used only for the
+  // outermost scope and includes ones in subscopes.
+  DenseSet<SelectInst *> TrueBiasedSelects;
+  DenseSet<SelectInst *> FalseBiasedSelects;
+
+  // Map from one of the above regions to the instructions to stop
+  // hoisting instructions at through use-def chains.
+  HoistStopMapTy HoistStopMap;
+
+ private:
+   CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
+       : RegInfos(RegInfosIn.begin(), RegInfosIn.end()),
+         Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {}
+};
+
+class CHR {
+ public:
+  CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
+      ProfileSummaryInfo &PSIin, RegionInfo &RIin,
+      OptimizationRemarkEmitter &OREin)
+      : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
+
+  ~CHR() {
+    for (CHRScope *Scope : Scopes) {
+      delete Scope;
+    }
+  }
+
+  bool run();
+
+ private:
+  // See the comments in CHR::run() for the high level flow of the algorithm and
+  // what the following functions do.
+
+  void findScopes(SmallVectorImpl<CHRScope *> &Output) {
+    Region *R = RI.getTopLevelRegion();
+    if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
+      Output.push_back(Scope);
+    }
+  }
+  CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+                        SmallVectorImpl<CHRScope *> &Scopes);
+  CHRScope *findScope(Region *R);
+  void checkScopeHoistable(CHRScope *Scope);
+
+  void splitScopes(SmallVectorImpl<CHRScope *> &Input,
+                   SmallVectorImpl<CHRScope *> &Output);
+  SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
+                                        CHRScope *Outer,
+                                        DenseSet<Value *> *OuterConditionValues,
+                                        Instruction *OuterInsertPoint,
+                                        SmallVectorImpl<CHRScope *> &Output,
+                                        DenseSet<Instruction *> &Unhoistables);
+
+  void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
+  void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
+
+  void filterScopes(SmallVectorImpl<CHRScope *> &Input,
+                    SmallVectorImpl<CHRScope *> &Output);
+
+  void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+                     SmallVectorImpl<CHRScope *> &Output);
+  void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
+
+  void sortScopes(SmallVectorImpl<CHRScope *> &Input,
+                  SmallVectorImpl<CHRScope *> &Output);
+
+  void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
+  void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
+  void cloneScopeBlocks(CHRScope *Scope,
+                        BasicBlock *PreEntryBlock,
+                        BasicBlock *ExitBlock,
+                        Region *LastRegion,
+                        ValueToValueMapTy &VMap);
+  BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
+                                 BasicBlock *EntryBlock,
+                                 BasicBlock *NewEntryBlock,
+                                 ValueToValueMapTy &VMap);
+  void fixupBranchesAndSelects(CHRScope *Scope,
+                               BasicBlock *PreEntryBlock,
+                               BranchInst *MergedBR,
+                               uint64_t ProfileCount);
+  void fixupBranch(Region *R,
+                   CHRScope *Scope,
+                   IRBuilder<> &IRB,
+                   Value *&MergedCondition, BranchProbability &CHRBranchBias);
+  void fixupSelect(SelectInst* SI,
+                   CHRScope *Scope,
+                   IRBuilder<> &IRB,
+                   Value *&MergedCondition, BranchProbability &CHRBranchBias);
+  void addToMergedCondition(bool IsTrueBiased, Value *Cond,
+                            Instruction *BranchOrSelect,
+                            CHRScope *Scope,
+                            IRBuilder<> &IRB,
+                            Value *&MergedCondition);
+
+  Function &F;
+  BlockFrequencyInfo &BFI;
+  DominatorTree &DT;
+  ProfileSummaryInfo &PSI;
+  RegionInfo &RI;
+  OptimizationRemarkEmitter &ORE;
+  CHRStats Stats;
+
+  // All the true-biased regions in the function
+  DenseSet<Region *> TrueBiasedRegionsGlobal;
+  // All the false-biased regions in the function
+  DenseSet<Region *> FalseBiasedRegionsGlobal;
+  // All the true-biased selects in the function
+  DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
+  // All the false-biased selects in the function
+  DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
+  // A map from biased regions to their branch bias
+  DenseMap<Region *, BranchProbability> BranchBiasMap;
+  // A map from biased selects to their branch bias
+  DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
+  // All the scopes.
+  DenseSet<CHRScope *> Scopes;
+};
+
+} // end anonymous namespace
+
+static inline
+raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
+                                              const CHRStats &Stats) {
+  Stats.print(OS);
+  return OS;
+}
+
+static inline
+raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
+  Scope.print(OS);
+  return OS;
+}
+
+static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
+  if (ForceCHR)
+    return true;
+
+  if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
+    if (CHRModules.count(F.getParent()->getName()))
+      return true;
+    return CHRFunctions.count(F.getName());
+  }
+
+  assert(PSI.hasProfileSummary() && "Empty PSI?");
+  return PSI.isFunctionEntryHot(&F);
+}
+
+static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
+                                         CHRStats *Stats) {
+  StringRef FuncName = F.getName();
+  StringRef ModuleName = F.getParent()->getName();
+  (void)(FuncName); // Unused in release build.
+  (void)(ModuleName); // Unused in release build.
+  CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
+            << FuncName);
+  if (Stats)
+    CHR_DEBUG(dbgs() << " " << *Stats);
+  CHR_DEBUG(dbgs() << "\n");
+  CHR_DEBUG(F.dump());
+}
+
+void CHRScope::print(raw_ostream &OS) const {
+  assert(RegInfos.size() > 0 && "Empty CHRScope");
+  OS << "CHRScope[";
+  OS << RegInfos.size() << ", Regions[";
+  for (const RegInfo &RI : RegInfos) {
+    OS << RI.R->getNameStr();
+    if (RI.HasBranch)
+      OS << " B";
+    if (RI.Selects.size() > 0)
+      OS << " S" << RI.Selects.size();
+    OS << ", ";
+  }
+  if (RegInfos[0].R->getParent()) {
+    OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
+  } else {
+    // top level region
+    OS << "]";
+  }
+  OS << ", Subs[";
+  for (CHRScope *Sub : Subs) {
+    OS << *Sub << ", ";
+  }
+  OS << "]]";
+}
+
+// Return true if the given instruction type can be hoisted by CHR.
+static bool isHoistableInstructionType(Instruction *I) {
+  return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
+      isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+      isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+      isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
+      isa<InsertValueInst>(I);
+}
+
+// Return true if the given instruction can be hoisted by CHR.
+static bool isHoistable(Instruction *I, DominatorTree &DT) {
+  if (!isHoistableInstructionType(I))
+    return false;
+  return isSafeToSpeculativelyExecute(I, nullptr, &DT);
+}
+
+// Recursively traverse the use-def chains of the given value and return a set
+// of the unhoistable base values defined within the scope (excluding the
+// first-region entry block) or the (hoistable or unhoistable) base values that
+// are defined outside (including the first-region entry block) of the
+// scope. The returned set doesn't include constants.
+static const std::set<Value *> &
+getBaseValues(Value *V, DominatorTree &DT,
+              DenseMap<Value *, std::set<Value *>> &Visited) {
+  auto It = Visited.find(V);
+  if (It != Visited.end()) {
+    return It->second;
+  }
+  std::set<Value *> Result;
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    // We don't stop at a block that's not in the Scope because we would miss
+    // some instructions that are based on the same base values if we stop
+    // there.
+    if (!isHoistable(I, DT)) {
+      Result.insert(I);
+      return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
+    }
+    // I is hoistable above the Scope.
+    for (Value *Op : I->operands()) {
+      const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
+      Result.insert(OpResult.begin(), OpResult.end());
+    }
+    return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
+  }
+  if (isa<Argument>(V)) {
+    Result.insert(V);
+  }
+  // We don't include others like constants because those won't lead to any
+  // chance of folding of conditions (eg two bit checks merged into one check)
+  // after CHR.
+  return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
+}
+
+// Return true if V is already hoisted or can be hoisted (along with its
+// operands) above the insert point. When it returns true and HoistStops is
+// non-null, the instructions to stop hoisting at through the use-def chains are
+// inserted into HoistStops.
+static bool
+checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
+                DenseSet<Instruction *> &Unhoistables,
+                DenseSet<Instruction *> *HoistStops,
+                DenseMap<Instruction *, bool> &Visited) {
+  assert(InsertPoint && "Null InsertPoint");
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    auto It = Visited.find(I);
+    if (It != Visited.end()) {
+      return It->second;
+    }
+    assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
+    assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
+    if (Unhoistables.count(I)) {
+      // Don't hoist if they are not to be hoisted.
+      Visited[I] = false;
+      return false;
+    }
+    if (DT.dominates(I, InsertPoint)) {
+      // We are already above the insert point. Stop here.
+      if (HoistStops)
+        HoistStops->insert(I);
+      Visited[I] = true;
+      return true;
+    }
+    // We aren't not above the insert point, check if we can hoist it above the
+    // insert point.
+    if (isHoistable(I, DT)) {
+      // Check operands first.
+      DenseSet<Instruction *> OpsHoistStops;
+      bool AllOpsHoisted = true;
+      for (Value *Op : I->operands()) {
+        if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
+                             Visited)) {
+          AllOpsHoisted = false;
+          break;
+        }
+      }
+      if (AllOpsHoisted) {
+        CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
+        if (HoistStops)
+          HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
+        Visited[I] = true;
+        return true;
+      }
+    }
+    Visited[I] = false;
+    return false;
+  }
+  // Non-instructions are considered hoistable.
+  return true;
+}
+
+// Returns true and sets the true probability and false probability of an
+// MD_prof metadata if it's well-formed.
+static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
+                        BranchProbability &FalseProb) {
+  if (!MD) return false;
+  MDString *MDName = cast<MDString>(MD->getOperand(0));
+  if (MDName->getString() != "branch_weights" ||
+      MD->getNumOperands() != 3)
+    return false;
+  ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
+  ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
+  if (!TrueWeight || !FalseWeight)
+    return false;
+  uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
+  uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
+  uint64_t SumWt = TrueWt + FalseWt;
+
+  assert(SumWt >= TrueWt && SumWt >= FalseWt &&
+         "Overflow calculating branch probabilities.");
+
+  // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
+  if (SumWt == 0)
+    return false;
+
+  TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
+  FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
+  return true;
+}
+
+static BranchProbability getCHRBiasThreshold() {
+  return BranchProbability::getBranchProbability(
+      static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
+}
+
+// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
+// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
+// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
+// false.
+template <typename K, typename S, typename M>
+static bool checkBias(K *Key, BranchProbability TrueProb,
+                      BranchProbability FalseProb, S &TrueSet, S &FalseSet,
+                      M &BiasMap) {
+  BranchProbability Threshold = getCHRBiasThreshold();
+  if (TrueProb >= Threshold) {
+    TrueSet.insert(Key);
+    BiasMap[Key] = TrueProb;
+    return true;
+  } else if (FalseProb >= Threshold) {
+    FalseSet.insert(Key);
+    BiasMap[Key] = FalseProb;
+    return true;
+  }
+  return false;
+}
+
+// Returns true and insert a region into the right biased set and the map if the
+// branch of the region is biased.
+static bool checkBiasedBranch(BranchInst *BI, Region *R,
+                              DenseSet<Region *> &TrueBiasedRegionsGlobal,
+                              DenseSet<Region *> &FalseBiasedRegionsGlobal,
+                              DenseMap<Region *, BranchProbability> &BranchBiasMap) {
+  if (!BI->isConditional())
+    return false;
+  BranchProbability ThenProb, ElseProb;
+  if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
+                   ThenProb, ElseProb))
+    return false;
+  BasicBlock *IfThen = BI->getSuccessor(0);
+  BasicBlock *IfElse = BI->getSuccessor(1);
+  assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
+         IfThen != IfElse &&
+         "Invariant from findScopes");
+  if (IfThen == R->getExit()) {
+    // Swap them so that IfThen/ThenProb means going into the conditional code
+    // and IfElse/ElseProb means skipping it.
+    std::swap(IfThen, IfElse);
+    std::swap(ThenProb, ElseProb);
+  }
+  CHR_DEBUG(dbgs() << "BI " << *BI << " ");
+  CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
+  CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
+  return checkBias(R, ThenProb, ElseProb,
+                   TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+                   BranchBiasMap);
+}
+
+// Returns true and insert a select into the right biased set and the map if the
+// select is biased.
+static bool checkBiasedSelect(
+    SelectInst *SI, Region *R,
+    DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
+    DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
+    DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
+  BranchProbability TrueProb, FalseProb;
+  if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
+                   TrueProb, FalseProb))
+    return false;
+  CHR_DEBUG(dbgs() << "SI " << *SI << " ");
+  CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
+  CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
+  return checkBias(SI, TrueProb, FalseProb,
+                   TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
+                   SelectBiasMap);
+}
+
+// Returns the instruction at which to hoist the dependent condition values and
+// insert the CHR branch for a region. This is the terminator branch in the
+// entry block or the first select in the entry block, if any.
+static Instruction* getBranchInsertPoint(RegInfo &RI) {
+  Region *R = RI.R;
+  BasicBlock *EntryBB = R->getEntry();
+  // The hoist point is by default the terminator of the entry block, which is
+  // the same as the branch instruction if RI.HasBranch is true.
+  Instruction *HoistPoint = EntryBB->getTerminator();
+  for (SelectInst *SI : RI.Selects) {
+    if (SI->getParent() == EntryBB) {
+      // Pick the first select in Selects in the entry block.  Note Selects is
+      // sorted in the instruction order within a block (asserted below).
+      HoistPoint = SI;
+      break;
+    }
+  }
+  assert(HoistPoint && "Null HoistPoint");
+#ifndef NDEBUG
+  // Check that HoistPoint is the first one in Selects in the entry block,
+  // if any.
+  DenseSet<Instruction *> EntryBlockSelectSet;
+  for (SelectInst *SI : RI.Selects) {
+    if (SI->getParent() == EntryBB) {
+      EntryBlockSelectSet.insert(SI);
+    }
+  }
+  for (Instruction &I : *EntryBB) {
     if (EntryBlockSelectSet.contains(&I)) {
-      assert(&I == HoistPoint && 
-             "HoistPoint must be the first one in Selects"); 
-      break; 
-    } 
-  } 
-#endif 
-  return HoistPoint; 
-} 
- 
-// Find a CHR scope in the given region. 
-CHRScope * CHR::findScope(Region *R) { 
-  CHRScope *Result = nullptr; 
-  BasicBlock *Entry = R->getEntry(); 
-  BasicBlock *Exit = R->getExit();  // null if top level. 
-  assert(Entry && "Entry must not be null"); 
-  assert((Exit == nullptr) == (R->isTopLevelRegion()) && 
-         "Only top level region has a null exit"); 
-  if (Entry) 
-    CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 
-  else 
-    CHR_DEBUG(dbgs() << "Entry null\n"); 
-  if (Exit) 
-    CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 
-  else 
-    CHR_DEBUG(dbgs() << "Exit null\n"); 
-  // Exclude cases where Entry is part of a subregion (hence it doesn't belong 
-  // to this region). 
-  bool EntryInSubregion = RI.getRegionFor(Entry) != R; 
-  if (EntryInSubregion) 
-    return nullptr; 
-  // Exclude loops 
-  for (BasicBlock *Pred : predecessors(Entry)) 
-    if (R->contains(Pred)) 
-      return nullptr; 
-  if (Exit) { 
-    // Try to find an if-then block (check if R is an if-then). 
-    // if (cond) { 
-    //  ... 
-    // } 
-    auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 
-    if (BI) 
-      CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 
-    else 
-      CHR_DEBUG(dbgs() << "BI null\n"); 
-    if (BI && BI->isConditional()) { 
-      BasicBlock *S0 = BI->getSuccessor(0); 
-      BasicBlock *S1 = BI->getSuccessor(1); 
-      CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 
-      CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 
-      if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 
-        RegInfo RI(R); 
-        RI.HasBranch = checkBiasedBranch( 
-            BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 
-            BranchBiasMap); 
-        Result = new CHRScope(RI); 
-        Scopes.insert(Result); 
-        CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 
-        ++Stats.NumBranches; 
-        if (!RI.HasBranch) { 
-          ORE.emit([&]() { 
-            return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 
-                << "Branch not biased"; 
-          }); 
-        } 
-      } 
-    } 
-  } 
-  { 
-    // Try to look for selects in the direct child blocks (as opposed to in 
-    // subregions) of R. 
-    // ... 
-    // if (..) { // Some subregion 
-    //   ... 
-    // } 
-    // if (..) { // Some subregion 
-    //   ... 
-    // } 
-    // ... 
-    // a = cond ? b : c; 
-    // ... 
-    SmallVector<SelectInst *, 8> Selects; 
-    for (RegionNode *E : R->elements()) { 
-      if (E->isSubRegion()) 
-        continue; 
-      // This returns the basic block of E if E is a direct child of R (not a 
-      // subregion.) 
-      BasicBlock *BB = E->getEntry(); 
-      // Need to push in the order to make it easier to find the first Select 
-      // later. 
-      for (Instruction &I : *BB) { 
-        if (auto *SI = dyn_cast<SelectInst>(&I)) { 
-          Selects.push_back(SI); 
-          ++Stats.NumBranches; 
-        } 
-      } 
-    } 
-    if (Selects.size() > 0) { 
-      auto AddSelects = [&](RegInfo &RI) { 
-        for (auto *SI : Selects) 
-          if (checkBiasedSelect(SI, RI.R, 
-                                TrueBiasedSelectsGlobal, 
-                                FalseBiasedSelectsGlobal, 
-                                SelectBiasMap)) 
-            RI.Selects.push_back(SI); 
-          else 
-            ORE.emit([&]() { 
-              return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 
-                  << "Select not biased"; 
-            }); 
-      }; 
-      if (!Result) { 
-        CHR_DEBUG(dbgs() << "Found a select-only region\n"); 
-        RegInfo RI(R); 
-        AddSelects(RI); 
-        Result = new CHRScope(RI); 
-        Scopes.insert(Result); 
-      } else { 
-        CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 
-        AddSelects(Result->RegInfos[0]); 
-      } 
-    } 
-  } 
- 
-  if (Result) { 
-    checkScopeHoistable(Result); 
-  } 
-  return Result; 
-} 
- 
-// Check that any of the branch and the selects in the region could be 
-// hoisted above the the CHR branch insert point (the most dominating of 
-// them, either the branch (at the end of the first block) or the first 
-// select in the first block). If the branch can't be hoisted, drop the 
-// selects in the first blocks. 
-// 
-// For example, for the following scope/region with selects, we want to insert 
-// the merged branch right before the first select in the first/entry block by 
-// hoisting c1, c2, c3, and c4. 
-// 
-// // Branch insert point here. 
-// a = c1 ? b : c; // Select 1 
-// d = c2 ? e : f; // Select 2 
-// if (c3) { // Branch 
-//   ... 
-//   c4 = foo() // A call. 
-//   g = c4 ? h : i; // Select 3 
-// } 
-// 
-// But suppose we can't hoist c4 because it's dependent on the preceding 
-// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 
-// Select 2. If we can't hoist c3, we drop Selects 1 & 2. 
-void CHR::checkScopeHoistable(CHRScope *Scope) { 
-  RegInfo &RI = Scope->RegInfos[0]; 
-  Region *R = RI.R; 
-  BasicBlock *EntryBB = R->getEntry(); 
-  auto *Branch = RI.HasBranch ? 
-                 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 
-  SmallVector<SelectInst *, 8> &Selects = RI.Selects; 
-  if (RI.HasBranch || !Selects.empty()) { 
-    Instruction *InsertPoint = getBranchInsertPoint(RI); 
-    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 
-    // Avoid a data dependence from a select or a branch to a(nother) 
-    // select. Note no instruction can't data-depend on a branch (a branch 
-    // instruction doesn't produce a value). 
-    DenseSet<Instruction *> Unhoistables; 
-    // Initialize Unhoistables with the selects. 
-    for (SelectInst *SI : Selects) { 
-      Unhoistables.insert(SI); 
-    } 
-    // Remove Selects that can't be hoisted. 
-    for (auto it = Selects.begin(); it != Selects.end(); ) { 
-      SelectInst *SI = *it; 
-      if (SI == InsertPoint) { 
-        ++it; 
-        continue; 
-      } 
-      DenseMap<Instruction *, bool> Visited; 
-      bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 
-                                         DT, Unhoistables, nullptr, Visited); 
-      if (!IsHoistable) { 
-        CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 
-        ORE.emit([&]() { 
-          return OptimizationRemarkMissed(DEBUG_TYPE, 
-                                          "DropUnhoistableSelect", SI) 
-              << "Dropped unhoistable select"; 
-        }); 
-        it = Selects.erase(it); 
-        // Since we are dropping the select here, we also drop it from 
-        // Unhoistables. 
-        Unhoistables.erase(SI); 
-      } else 
-        ++it; 
-    } 
-    // Update InsertPoint after potentially removing selects. 
-    InsertPoint = getBranchInsertPoint(RI); 
-    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 
-    if (RI.HasBranch && InsertPoint != Branch) { 
-      DenseMap<Instruction *, bool> Visited; 
-      bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 
-                                         DT, Unhoistables, nullptr, Visited); 
-      if (!IsHoistable) { 
-        // If the branch isn't hoistable, drop the selects in the entry 
-        // block, preferring the branch, which makes the branch the hoist 
-        // point. 
-        assert(InsertPoint != Branch && "Branch must not be the hoist point"); 
-        CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 
-        CHR_DEBUG( 
-            for (SelectInst *SI : Selects) { 
-              dbgs() << "SI " << *SI << "\n"; 
-            }); 
-        for (SelectInst *SI : Selects) { 
-          ORE.emit([&]() { 
-            return OptimizationRemarkMissed(DEBUG_TYPE, 
-                                            "DropSelectUnhoistableBranch", SI) 
-                << "Dropped select due to unhoistable branch"; 
-          }); 
-        } 
+      assert(&I == HoistPoint &&
+             "HoistPoint must be the first one in Selects");
+      break;
+    }
+  }
+#endif
+  return HoistPoint;
+}
+
+// Find a CHR scope in the given region.
+CHRScope * CHR::findScope(Region *R) {
+  CHRScope *Result = nullptr;
+  BasicBlock *Entry = R->getEntry();
+  BasicBlock *Exit = R->getExit();  // null if top level.
+  assert(Entry && "Entry must not be null");
+  assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
+         "Only top level region has a null exit");
+  if (Entry)
+    CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
+  else
+    CHR_DEBUG(dbgs() << "Entry null\n");
+  if (Exit)
+    CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
+  else
+    CHR_DEBUG(dbgs() << "Exit null\n");
+  // Exclude cases where Entry is part of a subregion (hence it doesn't belong
+  // to this region).
+  bool EntryInSubregion = RI.getRegionFor(Entry) != R;
+  if (EntryInSubregion)
+    return nullptr;
+  // Exclude loops
+  for (BasicBlock *Pred : predecessors(Entry))
+    if (R->contains(Pred))
+      return nullptr;
+  if (Exit) {
+    // Try to find an if-then block (check if R is an if-then).
+    // if (cond) {
+    //  ...
+    // }
+    auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
+    if (BI)
+      CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
+    else
+      CHR_DEBUG(dbgs() << "BI null\n");
+    if (BI && BI->isConditional()) {
+      BasicBlock *S0 = BI->getSuccessor(0);
+      BasicBlock *S1 = BI->getSuccessor(1);
+      CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
+      CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
+      if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
+        RegInfo RI(R);
+        RI.HasBranch = checkBiasedBranch(
+            BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+            BranchBiasMap);
+        Result = new CHRScope(RI);
+        Scopes.insert(Result);
+        CHR_DEBUG(dbgs() << "Found a region with a branch\n");
+        ++Stats.NumBranches;
+        if (!RI.HasBranch) {
+          ORE.emit([&]() {
+            return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
+                << "Branch not biased";
+          });
+        }
+      }
+    }
+  }
+  {
+    // Try to look for selects in the direct child blocks (as opposed to in
+    // subregions) of R.
+    // ...
+    // if (..) { // Some subregion
+    //   ...
+    // }
+    // if (..) { // Some subregion
+    //   ...
+    // }
+    // ...
+    // a = cond ? b : c;
+    // ...
+    SmallVector<SelectInst *, 8> Selects;
+    for (RegionNode *E : R->elements()) {
+      if (E->isSubRegion())
+        continue;
+      // This returns the basic block of E if E is a direct child of R (not a
+      // subregion.)
+      BasicBlock *BB = E->getEntry();
+      // Need to push in the order to make it easier to find the first Select
+      // later.
+      for (Instruction &I : *BB) {
+        if (auto *SI = dyn_cast<SelectInst>(&I)) {
+          Selects.push_back(SI);
+          ++Stats.NumBranches;
+        }
+      }
+    }
+    if (Selects.size() > 0) {
+      auto AddSelects = [&](RegInfo &RI) {
+        for (auto *SI : Selects)
+          if (checkBiasedSelect(SI, RI.R,
+                                TrueBiasedSelectsGlobal,
+                                FalseBiasedSelectsGlobal,
+                                SelectBiasMap))
+            RI.Selects.push_back(SI);
+          else
+            ORE.emit([&]() {
+              return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
+                  << "Select not biased";
+            });
+      };
+      if (!Result) {
+        CHR_DEBUG(dbgs() << "Found a select-only region\n");
+        RegInfo RI(R);
+        AddSelects(RI);
+        Result = new CHRScope(RI);
+        Scopes.insert(Result);
+      } else {
+        CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
+        AddSelects(Result->RegInfos[0]);
+      }
+    }
+  }
+
+  if (Result) {
+    checkScopeHoistable(Result);
+  }
+  return Result;
+}
+
+// Check that any of the branch and the selects in the region could be
+// hoisted above the the CHR branch insert point (the most dominating of
+// them, either the branch (at the end of the first block) or the first
+// select in the first block). If the branch can't be hoisted, drop the
+// selects in the first blocks.
+//
+// For example, for the following scope/region with selects, we want to insert
+// the merged branch right before the first select in the first/entry block by
+// hoisting c1, c2, c3, and c4.
+//
+// // Branch insert point here.
+// a = c1 ? b : c; // Select 1
+// d = c2 ? e : f; // Select 2
+// if (c3) { // Branch
+//   ...
+//   c4 = foo() // A call.
+//   g = c4 ? h : i; // Select 3
+// }
+//
+// But suppose we can't hoist c4 because it's dependent on the preceding
+// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
+// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
+void CHR::checkScopeHoistable(CHRScope *Scope) {
+  RegInfo &RI = Scope->RegInfos[0];
+  Region *R = RI.R;
+  BasicBlock *EntryBB = R->getEntry();
+  auto *Branch = RI.HasBranch ?
+                 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
+  SmallVector<SelectInst *, 8> &Selects = RI.Selects;
+  if (RI.HasBranch || !Selects.empty()) {
+    Instruction *InsertPoint = getBranchInsertPoint(RI);
+    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+    // Avoid a data dependence from a select or a branch to a(nother)
+    // select. Note no instruction can't data-depend on a branch (a branch
+    // instruction doesn't produce a value).
+    DenseSet<Instruction *> Unhoistables;
+    // Initialize Unhoistables with the selects.
+    for (SelectInst *SI : Selects) {
+      Unhoistables.insert(SI);
+    }
+    // Remove Selects that can't be hoisted.
+    for (auto it = Selects.begin(); it != Selects.end(); ) {
+      SelectInst *SI = *it;
+      if (SI == InsertPoint) {
+        ++it;
+        continue;
+      }
+      DenseMap<Instruction *, bool> Visited;
+      bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
+                                         DT, Unhoistables, nullptr, Visited);
+      if (!IsHoistable) {
+        CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
+        ORE.emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE,
+                                          "DropUnhoistableSelect", SI)
+              << "Dropped unhoistable select";
+        });
+        it = Selects.erase(it);
+        // Since we are dropping the select here, we also drop it from
+        // Unhoistables.
+        Unhoistables.erase(SI);
+      } else
+        ++it;
+    }
+    // Update InsertPoint after potentially removing selects.
+    InsertPoint = getBranchInsertPoint(RI);
+    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+    if (RI.HasBranch && InsertPoint != Branch) {
+      DenseMap<Instruction *, bool> Visited;
+      bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
+                                         DT, Unhoistables, nullptr, Visited);
+      if (!IsHoistable) {
+        // If the branch isn't hoistable, drop the selects in the entry
+        // block, preferring the branch, which makes the branch the hoist
+        // point.
+        assert(InsertPoint != Branch && "Branch must not be the hoist point");
+        CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
+        CHR_DEBUG(
+            for (SelectInst *SI : Selects) {
+              dbgs() << "SI " << *SI << "\n";
+            });
+        for (SelectInst *SI : Selects) {
+          ORE.emit([&]() {
+            return OptimizationRemarkMissed(DEBUG_TYPE,
+                                            "DropSelectUnhoistableBranch", SI)
+                << "Dropped select due to unhoistable branch";
+          });
+        }
         llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
           return SI->getParent() == EntryBB;
         });
-        Unhoistables.clear(); 
-        InsertPoint = Branch; 
-      } 
-    } 
-    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 
-#ifndef NDEBUG 
-    if (RI.HasBranch) { 
-      assert(!DT.dominates(Branch, InsertPoint) && 
-             "Branch can't be already above the hoist point"); 
-      DenseMap<Instruction *, bool> Visited; 
-      assert(checkHoistValue(Branch->getCondition(), InsertPoint, 
-                             DT, Unhoistables, nullptr, Visited) && 
-             "checkHoistValue for branch"); 
-    } 
-    for (auto *SI : Selects) { 
-      assert(!DT.dominates(SI, InsertPoint) && 
-             "SI can't be already above the hoist point"); 
-      DenseMap<Instruction *, bool> Visited; 
-      assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 
-                             Unhoistables, nullptr, Visited) && 
-             "checkHoistValue for selects"); 
-    } 
-    CHR_DEBUG(dbgs() << "Result\n"); 
-    if (RI.HasBranch) { 
-      CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 
-    } 
-    for (auto *SI : Selects) { 
-      CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 
-    } 
-#endif 
-  } 
-} 
- 
-// Traverse the region tree, find all nested scopes and merge them if possible. 
-CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 
-                           SmallVectorImpl<CHRScope *> &Scopes) { 
-  CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 
-  CHRScope *Result = findScope(R); 
-  // Visit subscopes. 
-  CHRScope *ConsecutiveSubscope = nullptr; 
-  SmallVector<CHRScope *, 8> Subscopes; 
-  for (auto It = R->begin(); It != R->end(); ++It) { 
-    const std::unique_ptr<Region> &SubR = *It; 
-    auto NextIt = std::next(It); 
-    Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 
-    CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 
-              << "\n"); 
-    CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 
-    if (SubCHRScope) { 
-      CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 
-    } else { 
-      CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 
-    } 
-    if (SubCHRScope) { 
-      if (!ConsecutiveSubscope) 
-        ConsecutiveSubscope = SubCHRScope; 
-      else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 
-        Subscopes.push_back(ConsecutiveSubscope); 
-        ConsecutiveSubscope = SubCHRScope; 
-      } else 
-        ConsecutiveSubscope->append(SubCHRScope); 
-    } else { 
-      if (ConsecutiveSubscope) { 
-        Subscopes.push_back(ConsecutiveSubscope); 
-      } 
-      ConsecutiveSubscope = nullptr; 
-    } 
-  } 
-  if (ConsecutiveSubscope) { 
-    Subscopes.push_back(ConsecutiveSubscope); 
-  } 
-  for (CHRScope *Sub : Subscopes) { 
-    if (Result) { 
-      // Combine it with the parent. 
-      Result->addSub(Sub); 
-    } else { 
-      // Push Subscopes as they won't be combined with the parent. 
-      Scopes.push_back(Sub); 
-    } 
-  } 
-  return Result; 
-} 
- 
-static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 
-  DenseSet<Value *> ConditionValues; 
-  if (RI.HasBranch) { 
-    auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 
-    ConditionValues.insert(BI->getCondition()); 
-  } 
-  for (SelectInst *SI : RI.Selects) { 
-    ConditionValues.insert(SI->getCondition()); 
-  } 
-  return ConditionValues; 
-} 
- 
- 
-// Determine whether to split a scope depending on the sets of the branch 
-// condition values of the previous region and the current region. We split 
-// (return true) it if 1) the condition values of the inner/lower scope can't be 
-// hoisted up to the outer/upper scope, or 2) the two sets of the condition 
-// values have an empty intersection (because the combined branch conditions 
-// won't probably lead to a simpler combined condition). 
-static bool shouldSplit(Instruction *InsertPoint, 
-                        DenseSet<Value *> &PrevConditionValues, 
-                        DenseSet<Value *> &ConditionValues, 
-                        DominatorTree &DT, 
-                        DenseSet<Instruction *> &Unhoistables) { 
-  assert(InsertPoint && "Null InsertPoint"); 
-  CHR_DEBUG( 
-      dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 
-      for (Value *V : PrevConditionValues) { 
-        dbgs() << *V << ", "; 
-      } 
-      dbgs() << " ConditionValues "; 
-      for (Value *V : ConditionValues) { 
-        dbgs() << *V << ", "; 
-      } 
-      dbgs() << "\n"); 
-  // If any of Bases isn't hoistable to the hoist point, split. 
-  for (Value *V : ConditionValues) { 
-    DenseMap<Instruction *, bool> Visited; 
-    if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 
-      CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 
-      return true; // Not hoistable, split. 
-    } 
-  } 
-  // If PrevConditionValues or ConditionValues is empty, don't split to avoid 
-  // unnecessary splits at scopes with no branch/selects.  If 
-  // PrevConditionValues and ConditionValues don't intersect at all, split. 
-  if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 
-    // Use std::set as DenseSet doesn't work with set_intersection. 
-    std::set<Value *> PrevBases, Bases; 
-    DenseMap<Value *, std::set<Value *>> Visited; 
-    for (Value *V : PrevConditionValues) { 
-      const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 
-      PrevBases.insert(BaseValues.begin(), BaseValues.end()); 
-    } 
-    for (Value *V : ConditionValues) { 
-      const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 
-      Bases.insert(BaseValues.begin(), BaseValues.end()); 
-    } 
-    CHR_DEBUG( 
-        dbgs() << "PrevBases "; 
-        for (Value *V : PrevBases) { 
-          dbgs() << *V << ", "; 
-        } 
-        dbgs() << " Bases "; 
-        for (Value *V : Bases) { 
-          dbgs() << *V << ", "; 
-        } 
-        dbgs() << "\n"); 
-    std::vector<Value *> Intersection; 
-    std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(), 
-                          Bases.end(), std::back_inserter(Intersection)); 
-    if (Intersection.empty()) { 
-      // Empty intersection, split. 
-      CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 
-      return true; 
-    } 
-  } 
-  CHR_DEBUG(dbgs() << "No split\n"); 
-  return false;  // Don't split. 
-} 
- 
-static void getSelectsInScope(CHRScope *Scope, 
-                              DenseSet<Instruction *> &Output) { 
-  for (RegInfo &RI : Scope->RegInfos) 
-    for (SelectInst *SI : RI.Selects) 
-      Output.insert(SI); 
-  for (CHRScope *Sub : Scope->Subs) 
-    getSelectsInScope(Sub, Output); 
-} 
- 
-void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 
-                      SmallVectorImpl<CHRScope *> &Output) { 
-  for (CHRScope *Scope : Input) { 
-    assert(!Scope->BranchInsertPoint && 
-           "BranchInsertPoint must not be set"); 
-    DenseSet<Instruction *> Unhoistables; 
-    getSelectsInScope(Scope, Unhoistables); 
-    splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 
-  } 
-#ifndef NDEBUG 
-  for (CHRScope *Scope : Output) { 
-    assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 
-  } 
-#endif 
-} 
- 
-SmallVector<CHRScope *, 8> CHR::splitScope( 
-    CHRScope *Scope, 
-    CHRScope *Outer, 
-    DenseSet<Value *> *OuterConditionValues, 
-    Instruction *OuterInsertPoint, 
-    SmallVectorImpl<CHRScope *> &Output, 
-    DenseSet<Instruction *> &Unhoistables) { 
-  if (Outer) { 
-    assert(OuterConditionValues && "Null OuterConditionValues"); 
-    assert(OuterInsertPoint && "Null OuterInsertPoint"); 
-  } 
-  bool PrevSplitFromOuter = true; 
-  DenseSet<Value *> PrevConditionValues; 
-  Instruction *PrevInsertPoint = nullptr; 
-  SmallVector<CHRScope *, 8> Splits; 
-  SmallVector<bool, 8> SplitsSplitFromOuter; 
-  SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 
-  SmallVector<Instruction *, 8> SplitsInsertPoints; 
-  SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos);  // Copy 
-  for (RegInfo &RI : RegInfos) { 
-    Instruction *InsertPoint = getBranchInsertPoint(RI); 
-    DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 
-    CHR_DEBUG( 
-        dbgs() << "ConditionValues "; 
-        for (Value *V : ConditionValues) { 
-          dbgs() << *V << ", "; 
-        } 
-        dbgs() << "\n"); 
-    if (RI.R == RegInfos[0].R) { 
-      // First iteration. Check to see if we should split from the outer. 
-      if (Outer) { 
-        CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 
-        CHR_DEBUG(dbgs() << "Should split from outer at " 
-                  << RI.R->getNameStr() << "\n"); 
-        if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 
-                        ConditionValues, DT, Unhoistables)) { 
-          PrevConditionValues = ConditionValues; 
-          PrevInsertPoint = InsertPoint; 
-          ORE.emit([&]() { 
-            return OptimizationRemarkMissed(DEBUG_TYPE, 
-                                            "SplitScopeFromOuter", 
-                                            RI.R->getEntry()->getTerminator()) 
-                << "Split scope from outer due to unhoistable branch/select " 
-                << "and/or lack of common condition values"; 
-          }); 
-        } else { 
-          // Not splitting from the outer. Use the outer bases and insert 
-          // point. Union the bases. 
-          PrevSplitFromOuter = false; 
-          PrevConditionValues = *OuterConditionValues; 
-          PrevConditionValues.insert(ConditionValues.begin(), 
-                                     ConditionValues.end()); 
-          PrevInsertPoint = OuterInsertPoint; 
-        } 
-      } else { 
-        CHR_DEBUG(dbgs() << "Outer null\n"); 
-        PrevConditionValues = ConditionValues; 
-        PrevInsertPoint = InsertPoint; 
-      } 
-    } else { 
-      CHR_DEBUG(dbgs() << "Should split from prev at " 
-                << RI.R->getNameStr() << "\n"); 
-      if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 
-                      DT, Unhoistables)) { 
-        CHRScope *Tail = Scope->split(RI.R); 
-        Scopes.insert(Tail); 
-        Splits.push_back(Scope); 
-        SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 
-        SplitsConditionValues.push_back(PrevConditionValues); 
-        SplitsInsertPoints.push_back(PrevInsertPoint); 
-        Scope = Tail; 
-        PrevConditionValues = ConditionValues; 
-        PrevInsertPoint = InsertPoint; 
-        PrevSplitFromOuter = true; 
-        ORE.emit([&]() { 
-          return OptimizationRemarkMissed(DEBUG_TYPE, 
-                                          "SplitScopeFromPrev", 
-                                          RI.R->getEntry()->getTerminator()) 
-              << "Split scope from previous due to unhoistable branch/select " 
-              << "and/or lack of common condition values"; 
-        }); 
-      } else { 
-        // Not splitting. Union the bases. Keep the hoist point. 
-        PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 
-      } 
-    } 
-  } 
-  Splits.push_back(Scope); 
-  SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 
-  SplitsConditionValues.push_back(PrevConditionValues); 
-  assert(PrevInsertPoint && "Null PrevInsertPoint"); 
-  SplitsInsertPoints.push_back(PrevInsertPoint); 
-  assert(Splits.size() == SplitsConditionValues.size() && 
-         Splits.size() == SplitsSplitFromOuter.size() && 
-         Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 
-  for (size_t I = 0; I < Splits.size(); ++I) { 
-    CHRScope *Split = Splits[I]; 
-    DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 
-    Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 
-    SmallVector<CHRScope *, 8> NewSubs; 
-    DenseSet<Instruction *> SplitUnhoistables; 
-    getSelectsInScope(Split, SplitUnhoistables); 
-    for (CHRScope *Sub : Split->Subs) { 
-      SmallVector<CHRScope *, 8> SubSplits = splitScope( 
-          Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 
-          SplitUnhoistables); 
+        Unhoistables.clear();
+        InsertPoint = Branch;
+      }
+    }
+    CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+#ifndef NDEBUG
+    if (RI.HasBranch) {
+      assert(!DT.dominates(Branch, InsertPoint) &&
+             "Branch can't be already above the hoist point");
+      DenseMap<Instruction *, bool> Visited;
+      assert(checkHoistValue(Branch->getCondition(), InsertPoint,
+                             DT, Unhoistables, nullptr, Visited) &&
+             "checkHoistValue for branch");
+    }
+    for (auto *SI : Selects) {
+      assert(!DT.dominates(SI, InsertPoint) &&
+             "SI can't be already above the hoist point");
+      DenseMap<Instruction *, bool> Visited;
+      assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
+                             Unhoistables, nullptr, Visited) &&
+             "checkHoistValue for selects");
+    }
+    CHR_DEBUG(dbgs() << "Result\n");
+    if (RI.HasBranch) {
+      CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
+    }
+    for (auto *SI : Selects) {
+      CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
+    }
+#endif
+  }
+}
+
+// Traverse the region tree, find all nested scopes and merge them if possible.
+CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+                           SmallVectorImpl<CHRScope *> &Scopes) {
+  CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
+  CHRScope *Result = findScope(R);
+  // Visit subscopes.
+  CHRScope *ConsecutiveSubscope = nullptr;
+  SmallVector<CHRScope *, 8> Subscopes;
+  for (auto It = R->begin(); It != R->end(); ++It) {
+    const std::unique_ptr<Region> &SubR = *It;
+    auto NextIt = std::next(It);
+    Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
+    CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
+              << "\n");
+    CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
+    if (SubCHRScope) {
+      CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
+    } else {
+      CHR_DEBUG(dbgs() << "Subregion Scope null\n");
+    }
+    if (SubCHRScope) {
+      if (!ConsecutiveSubscope)
+        ConsecutiveSubscope = SubCHRScope;
+      else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
+        Subscopes.push_back(ConsecutiveSubscope);
+        ConsecutiveSubscope = SubCHRScope;
+      } else
+        ConsecutiveSubscope->append(SubCHRScope);
+    } else {
+      if (ConsecutiveSubscope) {
+        Subscopes.push_back(ConsecutiveSubscope);
+      }
+      ConsecutiveSubscope = nullptr;
+    }
+  }
+  if (ConsecutiveSubscope) {
+    Subscopes.push_back(ConsecutiveSubscope);
+  }
+  for (CHRScope *Sub : Subscopes) {
+    if (Result) {
+      // Combine it with the parent.
+      Result->addSub(Sub);
+    } else {
+      // Push Subscopes as they won't be combined with the parent.
+      Scopes.push_back(Sub);
+    }
+  }
+  return Result;
+}
+
+static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
+  DenseSet<Value *> ConditionValues;
+  if (RI.HasBranch) {
+    auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
+    ConditionValues.insert(BI->getCondition());
+  }
+  for (SelectInst *SI : RI.Selects) {
+    ConditionValues.insert(SI->getCondition());
+  }
+  return ConditionValues;
+}
+
+
+// Determine whether to split a scope depending on the sets of the branch
+// condition values of the previous region and the current region. We split
+// (return true) it if 1) the condition values of the inner/lower scope can't be
+// hoisted up to the outer/upper scope, or 2) the two sets of the condition
+// values have an empty intersection (because the combined branch conditions
+// won't probably lead to a simpler combined condition).
+static bool shouldSplit(Instruction *InsertPoint,
+                        DenseSet<Value *> &PrevConditionValues,
+                        DenseSet<Value *> &ConditionValues,
+                        DominatorTree &DT,
+                        DenseSet<Instruction *> &Unhoistables) {
+  assert(InsertPoint && "Null InsertPoint");
+  CHR_DEBUG(
+      dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
+      for (Value *V : PrevConditionValues) {
+        dbgs() << *V << ", ";
+      }
+      dbgs() << " ConditionValues ";
+      for (Value *V : ConditionValues) {
+        dbgs() << *V << ", ";
+      }
+      dbgs() << "\n");
+  // If any of Bases isn't hoistable to the hoist point, split.
+  for (Value *V : ConditionValues) {
+    DenseMap<Instruction *, bool> Visited;
+    if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
+      CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
+      return true; // Not hoistable, split.
+    }
+  }
+  // If PrevConditionValues or ConditionValues is empty, don't split to avoid
+  // unnecessary splits at scopes with no branch/selects.  If
+  // PrevConditionValues and ConditionValues don't intersect at all, split.
+  if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
+    // Use std::set as DenseSet doesn't work with set_intersection.
+    std::set<Value *> PrevBases, Bases;
+    DenseMap<Value *, std::set<Value *>> Visited;
+    for (Value *V : PrevConditionValues) {
+      const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
+      PrevBases.insert(BaseValues.begin(), BaseValues.end());
+    }
+    for (Value *V : ConditionValues) {
+      const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
+      Bases.insert(BaseValues.begin(), BaseValues.end());
+    }
+    CHR_DEBUG(
+        dbgs() << "PrevBases ";
+        for (Value *V : PrevBases) {
+          dbgs() << *V << ", ";
+        }
+        dbgs() << " Bases ";
+        for (Value *V : Bases) {
+          dbgs() << *V << ", ";
+        }
+        dbgs() << "\n");
+    std::vector<Value *> Intersection;
+    std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
+                          Bases.end(), std::back_inserter(Intersection));
+    if (Intersection.empty()) {
+      // Empty intersection, split.
+      CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
+      return true;
+    }
+  }
+  CHR_DEBUG(dbgs() << "No split\n");
+  return false;  // Don't split.
+}
+
+static void getSelectsInScope(CHRScope *Scope,
+                              DenseSet<Instruction *> &Output) {
+  for (RegInfo &RI : Scope->RegInfos)
+    for (SelectInst *SI : RI.Selects)
+      Output.insert(SI);
+  for (CHRScope *Sub : Scope->Subs)
+    getSelectsInScope(Sub, Output);
+}
+
+void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
+                      SmallVectorImpl<CHRScope *> &Output) {
+  for (CHRScope *Scope : Input) {
+    assert(!Scope->BranchInsertPoint &&
+           "BranchInsertPoint must not be set");
+    DenseSet<Instruction *> Unhoistables;
+    getSelectsInScope(Scope, Unhoistables);
+    splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
+  }
+#ifndef NDEBUG
+  for (CHRScope *Scope : Output) {
+    assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
+  }
+#endif
+}
+
+SmallVector<CHRScope *, 8> CHR::splitScope(
+    CHRScope *Scope,
+    CHRScope *Outer,
+    DenseSet<Value *> *OuterConditionValues,
+    Instruction *OuterInsertPoint,
+    SmallVectorImpl<CHRScope *> &Output,
+    DenseSet<Instruction *> &Unhoistables) {
+  if (Outer) {
+    assert(OuterConditionValues && "Null OuterConditionValues");
+    assert(OuterInsertPoint && "Null OuterInsertPoint");
+  }
+  bool PrevSplitFromOuter = true;
+  DenseSet<Value *> PrevConditionValues;
+  Instruction *PrevInsertPoint = nullptr;
+  SmallVector<CHRScope *, 8> Splits;
+  SmallVector<bool, 8> SplitsSplitFromOuter;
+  SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
+  SmallVector<Instruction *, 8> SplitsInsertPoints;
+  SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos);  // Copy
+  for (RegInfo &RI : RegInfos) {
+    Instruction *InsertPoint = getBranchInsertPoint(RI);
+    DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
+    CHR_DEBUG(
+        dbgs() << "ConditionValues ";
+        for (Value *V : ConditionValues) {
+          dbgs() << *V << ", ";
+        }
+        dbgs() << "\n");
+    if (RI.R == RegInfos[0].R) {
+      // First iteration. Check to see if we should split from the outer.
+      if (Outer) {
+        CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
+        CHR_DEBUG(dbgs() << "Should split from outer at "
+                  << RI.R->getNameStr() << "\n");
+        if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
+                        ConditionValues, DT, Unhoistables)) {
+          PrevConditionValues = ConditionValues;
+          PrevInsertPoint = InsertPoint;
+          ORE.emit([&]() {
+            return OptimizationRemarkMissed(DEBUG_TYPE,
+                                            "SplitScopeFromOuter",
+                                            RI.R->getEntry()->getTerminator())
+                << "Split scope from outer due to unhoistable branch/select "
+                << "and/or lack of common condition values";
+          });
+        } else {
+          // Not splitting from the outer. Use the outer bases and insert
+          // point. Union the bases.
+          PrevSplitFromOuter = false;
+          PrevConditionValues = *OuterConditionValues;
+          PrevConditionValues.insert(ConditionValues.begin(),
+                                     ConditionValues.end());
+          PrevInsertPoint = OuterInsertPoint;
+        }
+      } else {
+        CHR_DEBUG(dbgs() << "Outer null\n");
+        PrevConditionValues = ConditionValues;
+        PrevInsertPoint = InsertPoint;
+      }
+    } else {
+      CHR_DEBUG(dbgs() << "Should split from prev at "
+                << RI.R->getNameStr() << "\n");
+      if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
+                      DT, Unhoistables)) {
+        CHRScope *Tail = Scope->split(RI.R);
+        Scopes.insert(Tail);
+        Splits.push_back(Scope);
+        SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+        SplitsConditionValues.push_back(PrevConditionValues);
+        SplitsInsertPoints.push_back(PrevInsertPoint);
+        Scope = Tail;
+        PrevConditionValues = ConditionValues;
+        PrevInsertPoint = InsertPoint;
+        PrevSplitFromOuter = true;
+        ORE.emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE,
+                                          "SplitScopeFromPrev",
+                                          RI.R->getEntry()->getTerminator())
+              << "Split scope from previous due to unhoistable branch/select "
+              << "and/or lack of common condition values";
+        });
+      } else {
+        // Not splitting. Union the bases. Keep the hoist point.
+        PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
+      }
+    }
+  }
+  Splits.push_back(Scope);
+  SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+  SplitsConditionValues.push_back(PrevConditionValues);
+  assert(PrevInsertPoint && "Null PrevInsertPoint");
+  SplitsInsertPoints.push_back(PrevInsertPoint);
+  assert(Splits.size() == SplitsConditionValues.size() &&
+         Splits.size() == SplitsSplitFromOuter.size() &&
+         Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
+  for (size_t I = 0; I < Splits.size(); ++I) {
+    CHRScope *Split = Splits[I];
+    DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
+    Instruction *SplitInsertPoint = SplitsInsertPoints[I];
+    SmallVector<CHRScope *, 8> NewSubs;
+    DenseSet<Instruction *> SplitUnhoistables;
+    getSelectsInScope(Split, SplitUnhoistables);
+    for (CHRScope *Sub : Split->Subs) {
+      SmallVector<CHRScope *, 8> SubSplits = splitScope(
+          Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
+          SplitUnhoistables);
       llvm::append_range(NewSubs, SubSplits);
-    } 
-    Split->Subs = NewSubs; 
-  } 
-  SmallVector<CHRScope *, 8> Result; 
-  for (size_t I = 0; I < Splits.size(); ++I) { 
-    CHRScope *Split = Splits[I]; 
-    if (SplitsSplitFromOuter[I]) { 
-      // Split from the outer. 
-      Output.push_back(Split); 
-      Split->BranchInsertPoint = SplitsInsertPoints[I]; 
-      CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 
-                << "\n"); 
-    } else { 
-      // Connected to the outer. 
-      Result.push_back(Split); 
-    } 
-  } 
-  if (!Outer) 
-    assert(Result.empty() && 
-           "If no outer (top-level), must return no nested ones"); 
-  return Result; 
-} 
- 
-void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 
-  for (CHRScope *Scope : Scopes) { 
-    assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 
-    classifyBiasedScopes(Scope, Scope); 
-    CHR_DEBUG( 
-        dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 
-        dbgs() << "TrueBiasedRegions "; 
-        for (Region *R : Scope->TrueBiasedRegions) { 
-          dbgs() << R->getNameStr() << ", "; 
-        } 
-        dbgs() << "\n"; 
-        dbgs() << "FalseBiasedRegions "; 
-        for (Region *R : Scope->FalseBiasedRegions) { 
-          dbgs() << R->getNameStr() << ", "; 
-        } 
-        dbgs() << "\n"; 
-        dbgs() << "TrueBiasedSelects "; 
-        for (SelectInst *SI : Scope->TrueBiasedSelects) { 
-          dbgs() << *SI << ", "; 
-        } 
-        dbgs() << "\n"; 
-        dbgs() << "FalseBiasedSelects "; 
-        for (SelectInst *SI : Scope->FalseBiasedSelects) { 
-          dbgs() << *SI << ", "; 
-        } 
-        dbgs() << "\n";); 
-  } 
-} 
- 
-void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 
-  for (RegInfo &RI : Scope->RegInfos) { 
-    if (RI.HasBranch) { 
-      Region *R = RI.R; 
+    }
+    Split->Subs = NewSubs;
+  }
+  SmallVector<CHRScope *, 8> Result;
+  for (size_t I = 0; I < Splits.size(); ++I) {
+    CHRScope *Split = Splits[I];
+    if (SplitsSplitFromOuter[I]) {
+      // Split from the outer.
+      Output.push_back(Split);
+      Split->BranchInsertPoint = SplitsInsertPoints[I];
+      CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
+                << "\n");
+    } else {
+      // Connected to the outer.
+      Result.push_back(Split);
+    }
+  }
+  if (!Outer)
+    assert(Result.empty() &&
+           "If no outer (top-level), must return no nested ones");
+  return Result;
+}
+
+void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
+  for (CHRScope *Scope : Scopes) {
+    assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
+    classifyBiasedScopes(Scope, Scope);
+    CHR_DEBUG(
+        dbgs() << "classifyBiasedScopes " << *Scope << "\n";
+        dbgs() << "TrueBiasedRegions ";
+        for (Region *R : Scope->TrueBiasedRegions) {
+          dbgs() << R->getNameStr() << ", ";
+        }
+        dbgs() << "\n";
+        dbgs() << "FalseBiasedRegions ";
+        for (Region *R : Scope->FalseBiasedRegions) {
+          dbgs() << R->getNameStr() << ", ";
+        }
+        dbgs() << "\n";
+        dbgs() << "TrueBiasedSelects ";
+        for (SelectInst *SI : Scope->TrueBiasedSelects) {
+          dbgs() << *SI << ", ";
+        }
+        dbgs() << "\n";
+        dbgs() << "FalseBiasedSelects ";
+        for (SelectInst *SI : Scope->FalseBiasedSelects) {
+          dbgs() << *SI << ", ";
+        }
+        dbgs() << "\n";);
+  }
+}
+
+void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
+  for (RegInfo &RI : Scope->RegInfos) {
+    if (RI.HasBranch) {
+      Region *R = RI.R;
       if (TrueBiasedRegionsGlobal.contains(R))
-        OutermostScope->TrueBiasedRegions.insert(R); 
+        OutermostScope->TrueBiasedRegions.insert(R);
       else if (FalseBiasedRegionsGlobal.contains(R))
-        OutermostScope->FalseBiasedRegions.insert(R); 
-      else 
-        llvm_unreachable("Must be biased"); 
-    } 
-    for (SelectInst *SI : RI.Selects) { 
+        OutermostScope->FalseBiasedRegions.insert(R);
+      else
+        llvm_unreachable("Must be biased");
+    }
+    for (SelectInst *SI : RI.Selects) {
       if (TrueBiasedSelectsGlobal.contains(SI))
-        OutermostScope->TrueBiasedSelects.insert(SI); 
+        OutermostScope->TrueBiasedSelects.insert(SI);
       else if (FalseBiasedSelectsGlobal.contains(SI))
-        OutermostScope->FalseBiasedSelects.insert(SI); 
-      else 
-        llvm_unreachable("Must be biased"); 
-    } 
-  } 
-  for (CHRScope *Sub : Scope->Subs) { 
-    classifyBiasedScopes(Sub, OutermostScope); 
-  } 
-} 
- 
-static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 
-  unsigned NumBiased = Scope->TrueBiasedRegions.size() + 
-                       Scope->FalseBiasedRegions.size() + 
-                       Scope->TrueBiasedSelects.size() + 
-                       Scope->FalseBiasedSelects.size(); 
-  return NumBiased >= CHRMergeThreshold; 
-} 
- 
-void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 
-                       SmallVectorImpl<CHRScope *> &Output) { 
-  for (CHRScope *Scope : Input) { 
-    // Filter out the ones with only one region and no subs. 
-    if (!hasAtLeastTwoBiasedBranches(Scope)) { 
-      CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 
-                << Scope->TrueBiasedRegions.size() 
-                << " falsy-regions " << Scope->FalseBiasedRegions.size() 
-                << " true-selects " << Scope->TrueBiasedSelects.size() 
-                << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 
-      ORE.emit([&]() { 
-        return OptimizationRemarkMissed( 
-            DEBUG_TYPE, 
-            "DropScopeWithOneBranchOrSelect", 
-            Scope->RegInfos[0].R->getEntry()->getTerminator()) 
-            << "Drop scope with < " 
-            << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 
-            << " biased branch(es) or select(s)"; 
-      }); 
-      continue; 
-    } 
-    Output.push_back(Scope); 
-  } 
-} 
- 
-void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 
-                        SmallVectorImpl<CHRScope *> &Output) { 
-  for (CHRScope *Scope : Input) { 
-    assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 
-           "Empty"); 
-    setCHRRegions(Scope, Scope); 
-    Output.push_back(Scope); 
-    CHR_DEBUG( 
-        dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 
-        for (auto pair : Scope->HoistStopMap) { 
-          Region *R = pair.first; 
-          dbgs() << "Region " << R->getNameStr() << "\n"; 
-          for (Instruction *I : pair.second) { 
-            dbgs() << "HoistStop " << *I << "\n"; 
-          } 
-        } 
-        dbgs() << "CHRRegions" << "\n"; 
-        for (RegInfo &RI : Scope->CHRRegions) { 
-          dbgs() << RI.R->getNameStr() << "\n"; 
-        }); 
-  } 
-} 
- 
-void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 
-  DenseSet<Instruction *> Unhoistables; 
-  // Put the biased selects in Unhoistables because they should stay where they 
-  // are and constant-folded after CHR (in case one biased select or a branch 
-  // can depend on another biased select.) 
-  for (RegInfo &RI : Scope->RegInfos) { 
-    for (SelectInst *SI : RI.Selects) { 
-      Unhoistables.insert(SI); 
-    } 
-  } 
-  Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 
-  for (RegInfo &RI : Scope->RegInfos) { 
-    Region *R = RI.R; 
-    DenseSet<Instruction *> HoistStops; 
-    bool IsHoisted = false; 
-    if (RI.HasBranch) { 
+        OutermostScope->FalseBiasedSelects.insert(SI);
+      else
+        llvm_unreachable("Must be biased");
+    }
+  }
+  for (CHRScope *Sub : Scope->Subs) {
+    classifyBiasedScopes(Sub, OutermostScope);
+  }
+}
+
+static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
+  unsigned NumBiased = Scope->TrueBiasedRegions.size() +
+                       Scope->FalseBiasedRegions.size() +
+                       Scope->TrueBiasedSelects.size() +
+                       Scope->FalseBiasedSelects.size();
+  return NumBiased >= CHRMergeThreshold;
+}
+
+void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
+                       SmallVectorImpl<CHRScope *> &Output) {
+  for (CHRScope *Scope : Input) {
+    // Filter out the ones with only one region and no subs.
+    if (!hasAtLeastTwoBiasedBranches(Scope)) {
+      CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
+                << Scope->TrueBiasedRegions.size()
+                << " falsy-regions " << Scope->FalseBiasedRegions.size()
+                << " true-selects " << Scope->TrueBiasedSelects.size()
+                << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(
+            DEBUG_TYPE,
+            "DropScopeWithOneBranchOrSelect",
+            Scope->RegInfos[0].R->getEntry()->getTerminator())
+            << "Drop scope with < "
+            << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
+            << " biased branch(es) or select(s)";
+      });
+      continue;
+    }
+    Output.push_back(Scope);
+  }
+}
+
+void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+                        SmallVectorImpl<CHRScope *> &Output) {
+  for (CHRScope *Scope : Input) {
+    assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
+           "Empty");
+    setCHRRegions(Scope, Scope);
+    Output.push_back(Scope);
+    CHR_DEBUG(
+        dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
+        for (auto pair : Scope->HoistStopMap) {
+          Region *R = pair.first;
+          dbgs() << "Region " << R->getNameStr() << "\n";
+          for (Instruction *I : pair.second) {
+            dbgs() << "HoistStop " << *I << "\n";
+          }
+        }
+        dbgs() << "CHRRegions" << "\n";
+        for (RegInfo &RI : Scope->CHRRegions) {
+          dbgs() << RI.R->getNameStr() << "\n";
+        });
+  }
+}
+
+void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
+  DenseSet<Instruction *> Unhoistables;
+  // Put the biased selects in Unhoistables because they should stay where they
+  // are and constant-folded after CHR (in case one biased select or a branch
+  // can depend on another biased select.)
+  for (RegInfo &RI : Scope->RegInfos) {
+    for (SelectInst *SI : RI.Selects) {
+      Unhoistables.insert(SI);
+    }
+  }
+  Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
+  for (RegInfo &RI : Scope->RegInfos) {
+    Region *R = RI.R;
+    DenseSet<Instruction *> HoistStops;
+    bool IsHoisted = false;
+    if (RI.HasBranch) {
       assert((OutermostScope->TrueBiasedRegions.contains(R) ||
               OutermostScope->FalseBiasedRegions.contains(R)) &&
-             "Must be truthy or falsy"); 
-      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 
-      // Note checkHoistValue fills in HoistStops. 
-      DenseMap<Instruction *, bool> Visited; 
-      bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 
-                                         Unhoistables, &HoistStops, Visited); 
-      assert(IsHoistable && "Must be hoistable"); 
-      (void)(IsHoistable);  // Unused in release build 
-      IsHoisted = true; 
-    } 
-    for (SelectInst *SI : RI.Selects) { 
+             "Must be truthy or falsy");
+      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+      // Note checkHoistValue fills in HoistStops.
+      DenseMap<Instruction *, bool> Visited;
+      bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
+                                         Unhoistables, &HoistStops, Visited);
+      assert(IsHoistable && "Must be hoistable");
+      (void)(IsHoistable);  // Unused in release build
+      IsHoisted = true;
+    }
+    for (SelectInst *SI : RI.Selects) {
       assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
               OutermostScope->FalseBiasedSelects.contains(SI)) &&
-             "Must be true or false biased"); 
-      // Note checkHoistValue fills in HoistStops. 
-      DenseMap<Instruction *, bool> Visited; 
-      bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 
-                                         Unhoistables, &HoistStops, Visited); 
-      assert(IsHoistable && "Must be hoistable"); 
-      (void)(IsHoistable);  // Unused in release build 
-      IsHoisted = true; 
-    } 
-    if (IsHoisted) { 
-      OutermostScope->CHRRegions.push_back(RI); 
-      OutermostScope->HoistStopMap[R] = HoistStops; 
-    } 
-  } 
-  for (CHRScope *Sub : Scope->Subs) 
-    setCHRRegions(Sub, OutermostScope); 
-} 
- 
-static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 
-  return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 
-} 
- 
-void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 
-                     SmallVectorImpl<CHRScope *> &Output) { 
-  Output.resize(Input.size()); 
-  llvm::copy(Input, Output.begin()); 
-  llvm::stable_sort(Output, CHRScopeSorter); 
-} 
- 
-// Return true if V is already hoisted or was hoisted (along with its operands) 
-// to the insert point. 
-static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 
-                       HoistStopMapTy &HoistStopMap, 
-                       DenseSet<Instruction *> &HoistedSet, 
-                       DenseSet<PHINode *> &TrivialPHIs, 
-                       DominatorTree &DT) { 
-  auto IT = HoistStopMap.find(R); 
-  assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 
-  DenseSet<Instruction *> &HoistStops = IT->second; 
-  if (auto *I = dyn_cast<Instruction>(V)) { 
-    if (I == HoistPoint) 
-      return; 
-    if (HoistStops.count(I)) 
-      return; 
-    if (auto *PN = dyn_cast<PHINode>(I)) 
-      if (TrivialPHIs.count(PN)) 
-        // The trivial phi inserted by the previous CHR scope could replace a 
-        // non-phi in HoistStops. Note that since this phi is at the exit of a 
-        // previous CHR scope, which dominates this scope, it's safe to stop 
-        // hoisting there. 
-        return; 
-    if (HoistedSet.count(I)) 
-      // Already hoisted, return. 
-      return; 
-    assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 
-    assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 
-    assert(DT.getNode(HoistPoint->getParent()) && 
-           "DT must contain HoistPoint block"); 
-    if (DT.dominates(I, HoistPoint)) 
-      // We are already above the hoist point. Stop here. This may be necessary 
-      // when multiple scopes would independently hoist the same 
-      // instruction. Since an outer (dominating) scope would hoist it to its 
-      // entry before an inner (dominated) scope would to its entry, the inner 
-      // scope may see the instruction already hoisted, in which case it 
-      // potentially wrong for the inner scope to hoist it and could cause bad 
-      // IR (non-dominating def), but safe to skip hoisting it instead because 
-      // it's already in a block that dominates the inner scope. 
-      return; 
-    for (Value *Op : I->operands()) { 
-      hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 
-    } 
-    I->moveBefore(HoistPoint); 
-    HoistedSet.insert(I); 
-    CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 
-  } 
-} 
- 
-// Hoist the dependent condition values of the branches and the selects in the 
-// scope to the insert point. 
-static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 
-                                 DenseSet<PHINode *> &TrivialPHIs, 
-                                 DominatorTree &DT) { 
-  DenseSet<Instruction *> HoistedSet; 
-  for (const RegInfo &RI : Scope->CHRRegions) { 
-    Region *R = RI.R; 
-    bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 
-    bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 
-    if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 
-      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 
-      hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 
-                 HoistedSet, TrivialPHIs, DT); 
-    } 
-    for (SelectInst *SI : RI.Selects) { 
-      bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 
-      bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 
-      if (!(IsTrueBiased || IsFalseBiased)) 
-        continue; 
-      hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 
-                 HoistedSet, TrivialPHIs, DT); 
-    } 
-  } 
-} 
- 
-// Negate the predicate if an ICmp if it's used only by branches or selects by 
-// swapping the operands of the branches or the selects. Returns true if success. 
-static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 
-                                                 Instruction *ExcludedUser, 
-                                                 CHRScope *Scope) { 
-  for (User *U : ICmp->users()) { 
-    if (U == ExcludedUser) 
-      continue; 
-    if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 
-      continue; 
-    if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 
-      continue; 
-    return false; 
-  } 
-  for (User *U : ICmp->users()) { 
-    if (U == ExcludedUser) 
-      continue; 
-    if (auto *BI = dyn_cast<BranchInst>(U)) { 
-      assert(BI->isConditional() && "Must be conditional"); 
-      BI->swapSuccessors(); 
-      // Don't need to swap this in terms of 
-      // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 
-      // mean whehter the branch is likely go into the if-then rather than 
-      // successor0/successor1 and because we can tell which edge is the then or 
-      // the else one by comparing the destination to the region exit block. 
-      continue; 
-    } 
-    if (auto *SI = dyn_cast<SelectInst>(U)) { 
-      // Swap operands 
-      SI->swapValues(); 
-      SI->swapProfMetadata(); 
-      if (Scope->TrueBiasedSelects.count(SI)) { 
-        assert(Scope->FalseBiasedSelects.count(SI) == 0 && 
-               "Must not be already in"); 
-        Scope->FalseBiasedSelects.insert(SI); 
-      } else if (Scope->FalseBiasedSelects.count(SI)) { 
-        assert(Scope->TrueBiasedSelects.count(SI) == 0 && 
-               "Must not be already in"); 
-        Scope->TrueBiasedSelects.insert(SI); 
-      } 
-      continue; 
-    } 
-    llvm_unreachable("Must be a branch or a select"); 
-  } 
-  ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 
-  return true; 
-} 
- 
-// A helper for transformScopes. Insert a trivial phi at the scope exit block 
-// for a value that's defined in the scope but used outside it (meaning it's 
-// alive at the exit block). 
-static void insertTrivialPHIs(CHRScope *Scope, 
-                              BasicBlock *EntryBlock, BasicBlock *ExitBlock, 
-                              DenseSet<PHINode *> &TrivialPHIs) { 
-  SmallSetVector<BasicBlock *, 8> BlocksInScope; 
-  for (RegInfo &RI : Scope->RegInfos) { 
-    for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 
-                                            // sub-Scopes. 
-      BlocksInScope.insert(BB); 
-    } 
-  } 
-  CHR_DEBUG({ 
-    dbgs() << "Inserting redundant phis\n"; 
-    for (BasicBlock *BB : BlocksInScope) 
-      dbgs() << "BlockInScope " << BB->getName() << "\n"; 
-  }); 
-  for (BasicBlock *BB : BlocksInScope) { 
-    for (Instruction &I : *BB) { 
-      SmallVector<Instruction *, 8> Users; 
-      for (User *U : I.users()) { 
-        if (auto *UI = dyn_cast<Instruction>(U)) { 
-          if (BlocksInScope.count(UI->getParent()) == 0 && 
-              // Unless there's already a phi for I at the exit block. 
-              !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 
-            CHR_DEBUG(dbgs() << "V " << I << "\n"); 
-            CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 
-            Users.push_back(UI); 
-          } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 
-            // There's a loop backedge from a block that's dominated by this 
-            // scope to the entry block. 
-            CHR_DEBUG(dbgs() << "V " << I << "\n"); 
-            CHR_DEBUG(dbgs() 
-                      << "Used at entry block (for a back edge) by a phi user " 
-                      << *UI << "\n"); 
-            Users.push_back(UI); 
-          } 
-        } 
-      } 
-      if (Users.size() > 0) { 
-        // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 
-        // ExitBlock. Replace I with the new phi in UI unless UI is another 
-        // phi at ExitBlock. 
+             "Must be true or false biased");
+      // Note checkHoistValue fills in HoistStops.
+      DenseMap<Instruction *, bool> Visited;
+      bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
+                                         Unhoistables, &HoistStops, Visited);
+      assert(IsHoistable && "Must be hoistable");
+      (void)(IsHoistable);  // Unused in release build
+      IsHoisted = true;
+    }
+    if (IsHoisted) {
+      OutermostScope->CHRRegions.push_back(RI);
+      OutermostScope->HoistStopMap[R] = HoistStops;
+    }
+  }
+  for (CHRScope *Sub : Scope->Subs)
+    setCHRRegions(Sub, OutermostScope);
+}
+
+static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
+  return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
+}
+
+void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
+                     SmallVectorImpl<CHRScope *> &Output) {
+  Output.resize(Input.size());
+  llvm::copy(Input, Output.begin());
+  llvm::stable_sort(Output, CHRScopeSorter);
+}
+
+// Return true if V is already hoisted or was hoisted (along with its operands)
+// to the insert point.
+static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
+                       HoistStopMapTy &HoistStopMap,
+                       DenseSet<Instruction *> &HoistedSet,
+                       DenseSet<PHINode *> &TrivialPHIs,
+                       DominatorTree &DT) {
+  auto IT = HoistStopMap.find(R);
+  assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
+  DenseSet<Instruction *> &HoistStops = IT->second;
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    if (I == HoistPoint)
+      return;
+    if (HoistStops.count(I))
+      return;
+    if (auto *PN = dyn_cast<PHINode>(I))
+      if (TrivialPHIs.count(PN))
+        // The trivial phi inserted by the previous CHR scope could replace a
+        // non-phi in HoistStops. Note that since this phi is at the exit of a
+        // previous CHR scope, which dominates this scope, it's safe to stop
+        // hoisting there.
+        return;
+    if (HoistedSet.count(I))
+      // Already hoisted, return.
+      return;
+    assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
+    assert(DT.getNode(I->getParent()) && "DT must contain I's block");
+    assert(DT.getNode(HoistPoint->getParent()) &&
+           "DT must contain HoistPoint block");
+    if (DT.dominates(I, HoistPoint))
+      // We are already above the hoist point. Stop here. This may be necessary
+      // when multiple scopes would independently hoist the same
+      // instruction. Since an outer (dominating) scope would hoist it to its
+      // entry before an inner (dominated) scope would to its entry, the inner
+      // scope may see the instruction already hoisted, in which case it
+      // potentially wrong for the inner scope to hoist it and could cause bad
+      // IR (non-dominating def), but safe to skip hoisting it instead because
+      // it's already in a block that dominates the inner scope.
+      return;
+    for (Value *Op : I->operands()) {
+      hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
+    }
+    I->moveBefore(HoistPoint);
+    HoistedSet.insert(I);
+    CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
+  }
+}
+
+// Hoist the dependent condition values of the branches and the selects in the
+// scope to the insert point.
+static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
+                                 DenseSet<PHINode *> &TrivialPHIs,
+                                 DominatorTree &DT) {
+  DenseSet<Instruction *> HoistedSet;
+  for (const RegInfo &RI : Scope->CHRRegions) {
+    Region *R = RI.R;
+    bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+    bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+    if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+      hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+                 HoistedSet, TrivialPHIs, DT);
+    }
+    for (SelectInst *SI : RI.Selects) {
+      bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+      bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+      if (!(IsTrueBiased || IsFalseBiased))
+        continue;
+      hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+                 HoistedSet, TrivialPHIs, DT);
+    }
+  }
+}
+
+// Negate the predicate if an ICmp if it's used only by branches or selects by
+// swapping the operands of the branches or the selects. Returns true if success.
+static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
+                                                 Instruction *ExcludedUser,
+                                                 CHRScope *Scope) {
+  for (User *U : ICmp->users()) {
+    if (U == ExcludedUser)
+      continue;
+    if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
+      continue;
+    if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
+      continue;
+    return false;
+  }
+  for (User *U : ICmp->users()) {
+    if (U == ExcludedUser)
+      continue;
+    if (auto *BI = dyn_cast<BranchInst>(U)) {
+      assert(BI->isConditional() && "Must be conditional");
+      BI->swapSuccessors();
+      // Don't need to swap this in terms of
+      // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
+      // mean whehter the branch is likely go into the if-then rather than
+      // successor0/successor1 and because we can tell which edge is the then or
+      // the else one by comparing the destination to the region exit block.
+      continue;
+    }
+    if (auto *SI = dyn_cast<SelectInst>(U)) {
+      // Swap operands
+      SI->swapValues();
+      SI->swapProfMetadata();
+      if (Scope->TrueBiasedSelects.count(SI)) {
+        assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
+               "Must not be already in");
+        Scope->FalseBiasedSelects.insert(SI);
+      } else if (Scope->FalseBiasedSelects.count(SI)) {
+        assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
+               "Must not be already in");
+        Scope->TrueBiasedSelects.insert(SI);
+      }
+      continue;
+    }
+    llvm_unreachable("Must be a branch or a select");
+  }
+  ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
+  return true;
+}
+
+// A helper for transformScopes. Insert a trivial phi at the scope exit block
+// for a value that's defined in the scope but used outside it (meaning it's
+// alive at the exit block).
+static void insertTrivialPHIs(CHRScope *Scope,
+                              BasicBlock *EntryBlock, BasicBlock *ExitBlock,
+                              DenseSet<PHINode *> &TrivialPHIs) {
+  SmallSetVector<BasicBlock *, 8> BlocksInScope;
+  for (RegInfo &RI : Scope->RegInfos) {
+    for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+                                            // sub-Scopes.
+      BlocksInScope.insert(BB);
+    }
+  }
+  CHR_DEBUG({
+    dbgs() << "Inserting redundant phis\n";
+    for (BasicBlock *BB : BlocksInScope)
+      dbgs() << "BlockInScope " << BB->getName() << "\n";
+  });
+  for (BasicBlock *BB : BlocksInScope) {
+    for (Instruction &I : *BB) {
+      SmallVector<Instruction *, 8> Users;
+      for (User *U : I.users()) {
+        if (auto *UI = dyn_cast<Instruction>(U)) {
+          if (BlocksInScope.count(UI->getParent()) == 0 &&
+              // Unless there's already a phi for I at the exit block.
+              !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
+            CHR_DEBUG(dbgs() << "V " << I << "\n");
+            CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
+            Users.push_back(UI);
+          } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
+            // There's a loop backedge from a block that's dominated by this
+            // scope to the entry block.
+            CHR_DEBUG(dbgs() << "V " << I << "\n");
+            CHR_DEBUG(dbgs()
+                      << "Used at entry block (for a back edge) by a phi user "
+                      << *UI << "\n");
+            Users.push_back(UI);
+          }
+        }
+      }
+      if (Users.size() > 0) {
+        // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
+        // ExitBlock. Replace I with the new phi in UI unless UI is another
+        // phi at ExitBlock.
         PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "",
-                                      &ExitBlock->front()); 
-        for (BasicBlock *Pred : predecessors(ExitBlock)) { 
-          PN->addIncoming(&I, Pred); 
-        } 
-        TrivialPHIs.insert(PN); 
-        CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 
-        for (Instruction *UI : Users) { 
-          for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 
-            if (UI->getOperand(J) == &I) { 
-              UI->setOperand(J, PN); 
-            } 
-          } 
-          CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 
-        } 
-      } 
-    } 
-  } 
-} 
- 
-// Assert that all the CHR regions of the scope have a biased branch or select. 
-static void LLVM_ATTRIBUTE_UNUSED 
-assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 
-#ifndef NDEBUG 
-  auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 
-    if (Scope->TrueBiasedRegions.count(RI.R) || 
-        Scope->FalseBiasedRegions.count(RI.R)) 
-      return true; 
-    for (SelectInst *SI : RI.Selects) 
-      if (Scope->TrueBiasedSelects.count(SI) || 
-          Scope->FalseBiasedSelects.count(SI)) 
-        return true; 
-    return false; 
-  }; 
-  for (RegInfo &RI : Scope->CHRRegions) { 
-    assert(HasBiasedBranchOrSelect(RI, Scope) && 
-           "Must have biased branch or select"); 
-  } 
-#endif 
-} 
- 
-// Assert that all the condition values of the biased branches and selects have 
-// been hoisted to the pre-entry block or outside of the scope. 
-static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 
-    CHRScope *Scope, BasicBlock *PreEntryBlock) { 
-  CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 
-  for (RegInfo &RI : Scope->CHRRegions) { 
-    Region *R = RI.R; 
-    bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 
-    bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 
-    if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 
-      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 
-      Value *V = BI->getCondition(); 
-      CHR_DEBUG(dbgs() << *V << "\n"); 
-      if (auto *I = dyn_cast<Instruction>(V)) { 
-        (void)(I); // Unused in release build. 
-        assert((I->getParent() == PreEntryBlock || 
-                !Scope->contains(I)) && 
-               "Must have been hoisted to PreEntryBlock or outside the scope"); 
-      } 
-    } 
-    for (SelectInst *SI : RI.Selects) { 
-      bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 
-      bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 
-      if (!(IsTrueBiased || IsFalseBiased)) 
-        continue; 
-      Value *V = SI->getCondition(); 
-      CHR_DEBUG(dbgs() << *V << "\n"); 
-      if (auto *I = dyn_cast<Instruction>(V)) { 
-        (void)(I); // Unused in release build. 
-        assert((I->getParent() == PreEntryBlock || 
-                !Scope->contains(I)) && 
-               "Must have been hoisted to PreEntryBlock or outside the scope"); 
-      } 
-    } 
-  } 
-} 
- 
-void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 
-  CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 
- 
-  assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 
-  Region *FirstRegion = Scope->RegInfos[0].R; 
-  BasicBlock *EntryBlock = FirstRegion->getEntry(); 
-  Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 
-  BasicBlock *ExitBlock = LastRegion->getExit(); 
-  Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 
- 
-  if (ExitBlock) { 
-    // Insert a trivial phi at the exit block (where the CHR hot path and the 
-    // cold path merges) for a value that's defined in the scope but used 
-    // outside it (meaning it's alive at the exit block). We will add the 
-    // incoming values for the CHR cold paths to it below. Without this, we'd 
-    // miss updating phi's for such values unless there happens to already be a 
-    // phi for that value there. 
-    insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 
-  } 
- 
-  // Split the entry block of the first region. The new block becomes the new 
-  // entry block of the first region. The old entry block becomes the block to 
-  // insert the CHR branch into. Note DT gets updated. Since DT gets updated 
-  // through the split, we update the entry of the first region after the split, 
-  // and Region only points to the entry and the exit blocks, rather than 
-  // keeping everything in a list or set, the blocks membership and the 
-  // entry/exit blocks of the region are still valid after the split. 
-  CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 
-            << " at " << *Scope->BranchInsertPoint << "\n"); 
-  BasicBlock *NewEntryBlock = 
-      SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 
-  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 
-         "NewEntryBlock's only pred must be EntryBlock"); 
-  FirstRegion->replaceEntryRecursive(NewEntryBlock); 
-  BasicBlock *PreEntryBlock = EntryBlock; 
- 
-  ValueToValueMapTy VMap; 
-  // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 
-  // hot path (originals) and a cold path (clones) and update the PHIs at the 
-  // exit block. 
-  cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 
- 
-  // Replace the old (placeholder) branch with the new (merged) conditional 
-  // branch. 
-  BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 
-                                            NewEntryBlock, VMap); 
- 
-#ifndef NDEBUG 
-  assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 
-#endif 
- 
-  // Hoist the conditional values of the branches/selects. 
-  hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 
- 
-#ifndef NDEBUG 
-  assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 
-#endif 
- 
-  // Create the combined branch condition and constant-fold the branches/selects 
-  // in the hot path. 
-  fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 
-                          ProfileCount ? ProfileCount.getValue() : 0); 
-} 
- 
-// A helper for transformScopes. Clone the blocks in the scope (excluding the 
-// PreEntryBlock) to split into a hot path and a cold path and update the PHIs 
-// at the exit block. 
-void CHR::cloneScopeBlocks(CHRScope *Scope, 
-                           BasicBlock *PreEntryBlock, 
-                           BasicBlock *ExitBlock, 
-                           Region *LastRegion, 
-                           ValueToValueMapTy &VMap) { 
-  // Clone all the blocks. The original blocks will be the hot-path 
-  // CHR-optimized code and the cloned blocks will be the original unoptimized 
-  // code. This is so that the block pointers from the 
-  // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 
-  // which CHR should apply to. 
-  SmallVector<BasicBlock*, 8> NewBlocks; 
-  for (RegInfo &RI : Scope->RegInfos) 
-    for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 
-                                            // sub-Scopes. 
-      assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 
-      BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 
-      NewBlocks.push_back(NewBB); 
-      VMap[BB] = NewBB; 
-    } 
- 
-  // Place the cloned blocks right after the original blocks (right before the 
-  // exit block of.) 
-  if (ExitBlock) 
-    F.getBasicBlockList().splice(ExitBlock->getIterator(), 
-                                 F.getBasicBlockList(), 
-                                 NewBlocks[0]->getIterator(), F.end()); 
- 
-  // Update the cloned blocks/instructions to refer to themselves. 
-  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 
-    for (Instruction &I : *NewBlocks[i]) 
-      RemapInstruction(&I, VMap, 
-                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 
- 
-  // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 
-  // the top-level region but we don't need to add PHIs. The trivial PHIs 
-  // inserted above will be updated here. 
-  if (ExitBlock) 
-    for (PHINode &PN : ExitBlock->phis()) 
-      for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 
-           ++I) { 
-        BasicBlock *Pred = PN.getIncomingBlock(I); 
-        if (LastRegion->contains(Pred)) { 
-          Value *V = PN.getIncomingValue(I); 
-          auto It = VMap.find(V); 
-          if (It != VMap.end()) V = It->second; 
-          assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 
-          PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 
-        } 
-      } 
-} 
- 
-// A helper for transformScope. Replace the old (placeholder) branch with the 
-// new (merged) conditional branch. 
-BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 
-                                    BasicBlock *EntryBlock, 
-                                    BasicBlock *NewEntryBlock, 
-                                    ValueToValueMapTy &VMap) { 
-  BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 
-  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 
-         "SplitBlock did not work correctly!"); 
-  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 
-         "NewEntryBlock's only pred must be EntryBlock"); 
-  assert(VMap.find(NewEntryBlock) != VMap.end() && 
-         "NewEntryBlock must have been copied"); 
-  OldBR->dropAllReferences(); 
-  OldBR->eraseFromParent(); 
-  // The true predicate is a placeholder. It will be replaced later in 
-  // fixupBranchesAndSelects(). 
-  BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 
-                                         cast<BasicBlock>(VMap[NewEntryBlock]), 
-                                         ConstantInt::getTrue(F.getContext())); 
-  PreEntryBlock->getInstList().push_back(NewBR); 
-  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 
-         "NewEntryBlock's only pred must be EntryBlock"); 
-  return NewBR; 
-} 
- 
-// A helper for transformScopes. Create the combined branch condition and 
-// constant-fold the branches/selects in the hot path. 
-void CHR::fixupBranchesAndSelects(CHRScope *Scope, 
-                                  BasicBlock *PreEntryBlock, 
-                                  BranchInst *MergedBR, 
-                                  uint64_t ProfileCount) { 
-  Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 
-  BranchProbability CHRBranchBias(1, 1); 
-  uint64_t NumCHRedBranches = 0; 
-  IRBuilder<> IRB(PreEntryBlock->getTerminator()); 
-  for (RegInfo &RI : Scope->CHRRegions) { 
-    Region *R = RI.R; 
-    if (RI.HasBranch) { 
-      fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 
-      ++NumCHRedBranches; 
-    } 
-    for (SelectInst *SI : RI.Selects) { 
-      fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 
-      ++NumCHRedBranches; 
-    } 
-  } 
-  Stats.NumBranchesDelta += NumCHRedBranches - 1; 
-  Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 
-  ORE.emit([&]() { 
-    return OptimizationRemark(DEBUG_TYPE, 
-                              "CHR", 
-                              // Refer to the hot (original) path 
-                              MergedBR->getSuccessor(0)->getTerminator()) 
-        << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 
-        << " branches or selects"; 
-  }); 
-  MergedBR->setCondition(MergedCondition); 
-  uint32_t Weights[] = { 
-      static_cast<uint32_t>(CHRBranchBias.scale(1000)), 
-      static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)), 
-  }; 
-  MDBuilder MDB(F.getContext()); 
-  MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 
-  CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 
-            << "\n"); 
-} 
- 
-// A helper for fixupBranchesAndSelects. Add to the combined branch condition 
-// and constant-fold a branch in the hot path. 
-void CHR::fixupBranch(Region *R, CHRScope *Scope, 
-                      IRBuilder<> &IRB, 
-                      Value *&MergedCondition, 
-                      BranchProbability &CHRBranchBias) { 
-  bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 
-  assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 
-         "Must be truthy or falsy"); 
-  auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 
-  assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 
-         "Must be in the bias map"); 
-  BranchProbability Bias = BranchBiasMap[R]; 
-  assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 
-  // Take the min. 
-  if (CHRBranchBias > Bias) 
-    CHRBranchBias = Bias; 
-  BasicBlock *IfThen = BI->getSuccessor(1); 
-  BasicBlock *IfElse = BI->getSuccessor(0); 
-  BasicBlock *RegionExitBlock = R->getExit(); 
-  assert(RegionExitBlock && "Null ExitBlock"); 
-  assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 
-         IfThen != IfElse && "Invariant from findScopes"); 
-  if (IfThen == RegionExitBlock) { 
-    // Swap them so that IfThen means going into it and IfElse means skipping 
-    // it. 
-    std::swap(IfThen, IfElse); 
-  } 
-  CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 
-            << " IfElse " << IfElse->getName() << "\n"); 
-  Value *Cond = BI->getCondition(); 
-  BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 
-  bool ConditionTrue = HotTarget == BI->getSuccessor(0); 
-  addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 
-                       MergedCondition); 
-  // Constant-fold the branch at ClonedEntryBlock. 
-  assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 
-         "The successor shouldn't change"); 
-  Value *NewCondition = ConditionTrue ? 
-                        ConstantInt::getTrue(F.getContext()) : 
-                        ConstantInt::getFalse(F.getContext()); 
-  BI->setCondition(NewCondition); 
-} 
- 
-// A helper for fixupBranchesAndSelects. Add to the combined branch condition 
-// and constant-fold a select in the hot path. 
-void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 
-                      IRBuilder<> &IRB, 
-                      Value *&MergedCondition, 
-                      BranchProbability &CHRBranchBias) { 
-  bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 
-  assert((IsTrueBiased || 
-          Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 
-  assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 
-         "Must be in the bias map"); 
-  BranchProbability Bias = SelectBiasMap[SI]; 
-  assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 
-  // Take the min. 
-  if (CHRBranchBias > Bias) 
-    CHRBranchBias = Bias; 
-  Value *Cond = SI->getCondition(); 
-  addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 
-                       MergedCondition); 
-  Value *NewCondition = IsTrueBiased ? 
-                        ConstantInt::getTrue(F.getContext()) : 
-                        ConstantInt::getFalse(F.getContext()); 
-  SI->setCondition(NewCondition); 
-} 
- 
-// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 
-// condition. 
-void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 
-                               Instruction *BranchOrSelect, 
-                               CHRScope *Scope, 
-                               IRBuilder<> &IRB, 
-                               Value *&MergedCondition) { 
-  if (IsTrueBiased) { 
-    MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 
-  } else { 
-    // If Cond is an icmp and all users of V except for BranchOrSelect is a 
-    // branch, negate the icmp predicate and swap the branch targets and avoid 
-    // inserting an Xor to negate Cond. 
-    bool Done = false; 
-    if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 
-      if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 
-        MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 
-        Done = true; 
-      } 
-    if (!Done) { 
-      Value *Negate = IRB.CreateXor( 
-          ConstantInt::getTrue(F.getContext()), Cond); 
-      MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 
-    } 
-  } 
-} 
- 
-void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 
-  unsigned I = 0; 
-  DenseSet<PHINode *> TrivialPHIs; 
-  for (CHRScope *Scope : CHRScopes) { 
-    transformScopes(Scope, TrivialPHIs); 
-    CHR_DEBUG( 
-        std::ostringstream oss; 
-        oss << " after transformScopes " << I++; 
-        dumpIR(F, oss.str().c_str(), nullptr)); 
-    (void)I; 
-  } 
-} 
- 
-static void LLVM_ATTRIBUTE_UNUSED 
-dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 
-  dbgs() << Label << " " << Scopes.size() << "\n"; 
-  for (CHRScope *Scope : Scopes) { 
-    dbgs() << *Scope << "\n"; 
-  } 
-} 
- 
-bool CHR::run() { 
-  if (!shouldApply(F, PSI)) 
-    return false; 
- 
-  CHR_DEBUG(dumpIR(F, "before", nullptr)); 
- 
-  bool Changed = false; 
-  { 
-    CHR_DEBUG( 
-        dbgs() << "RegionInfo:\n"; 
-        RI.print(dbgs())); 
- 
-    // Recursively traverse the region tree and find regions that have biased 
-    // branches and/or selects and create scopes. 
-    SmallVector<CHRScope *, 8> AllScopes; 
-    findScopes(AllScopes); 
-    CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 
- 
-    // Split the scopes if 1) the conditiona values of the biased 
-    // branches/selects of the inner/lower scope can't be hoisted up to the 
-    // outermost/uppermost scope entry, or 2) the condition values of the biased 
-    // branches/selects in a scope (including subscopes) don't share at least 
-    // one common value. 
-    SmallVector<CHRScope *, 8> SplitScopes; 
-    splitScopes(AllScopes, SplitScopes); 
-    CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 
- 
-    // After splitting, set the biased regions and selects of a scope (a tree 
-    // root) that include those of the subscopes. 
-    classifyBiasedScopes(SplitScopes); 
-    CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 
- 
-    // Filter out the scopes that has only one biased region or select (CHR 
-    // isn't useful in such a case). 
-    SmallVector<CHRScope *, 8> FilteredScopes; 
-    filterScopes(SplitScopes, FilteredScopes); 
-    CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 
- 
-    // Set the regions to be CHR'ed and their hoist stops for each scope. 
-    SmallVector<CHRScope *, 8> SetScopes; 
-    setCHRRegions(FilteredScopes, SetScopes); 
-    CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 
- 
-    // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 
-    // ones. We need to apply CHR from outer to inner so that we apply CHR only 
-    // to the hot path, rather than both hot and cold paths. 
-    SmallVector<CHRScope *, 8> SortedScopes; 
-    sortScopes(SetScopes, SortedScopes); 
-    CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 
- 
-    CHR_DEBUG( 
-        dbgs() << "RegionInfo:\n"; 
-        RI.print(dbgs())); 
- 
-    // Apply the CHR transformation. 
-    if (!SortedScopes.empty()) { 
-      transformScopes(SortedScopes); 
-      Changed = true; 
-    } 
-  } 
- 
-  if (Changed) { 
-    CHR_DEBUG(dumpIR(F, "after", &Stats)); 
-    ORE.emit([&]() { 
-      return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 
-          << ore::NV("Function", &F) << " " 
-          << "Reduced the number of branches in hot paths by " 
-          << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 
-          << " (static) and " 
-          << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 
-          << " (weighted by PGO count)"; 
-    }); 
-  } 
- 
-  return Changed; 
-} 
- 
-bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 
-  BlockFrequencyInfo &BFI = 
-      getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 
-  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 
-  ProfileSummaryInfo &PSI = 
-      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 
-  RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 
-  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = 
-      std::make_unique<OptimizationRemarkEmitter>(&F); 
-  return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run(); 
-} 
- 
-namespace llvm { 
- 
-ControlHeightReductionPass::ControlHeightReductionPass() { 
-  parseCHRFilterFiles(); 
-} 
- 
-PreservedAnalyses ControlHeightReductionPass::run( 
-    Function &F, 
-    FunctionAnalysisManager &FAM) { 
-  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 
-  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 
-  auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 
-  auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 
-  auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 
-  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 
-  bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 
-  if (!Changed) 
-    return PreservedAnalyses::all(); 
-  auto PA = PreservedAnalyses(); 
-  PA.preserve<GlobalsAA>(); 
-  return PA; 
-} 
- 
-} // namespace llvm 
+                                      &ExitBlock->front());
+        for (BasicBlock *Pred : predecessors(ExitBlock)) {
+          PN->addIncoming(&I, Pred);
+        }
+        TrivialPHIs.insert(PN);
+        CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
+        for (Instruction *UI : Users) {
+          for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
+            if (UI->getOperand(J) == &I) {
+              UI->setOperand(J, PN);
+            }
+          }
+          CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
+        }
+      }
+    }
+  }
+}
+
+// Assert that all the CHR regions of the scope have a biased branch or select.
+static void LLVM_ATTRIBUTE_UNUSED
+assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
+#ifndef NDEBUG
+  auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
+    if (Scope->TrueBiasedRegions.count(RI.R) ||
+        Scope->FalseBiasedRegions.count(RI.R))
+      return true;
+    for (SelectInst *SI : RI.Selects)
+      if (Scope->TrueBiasedSelects.count(SI) ||
+          Scope->FalseBiasedSelects.count(SI))
+        return true;
+    return false;
+  };
+  for (RegInfo &RI : Scope->CHRRegions) {
+    assert(HasBiasedBranchOrSelect(RI, Scope) &&
+           "Must have biased branch or select");
+  }
+#endif
+}
+
+// Assert that all the condition values of the biased branches and selects have
+// been hoisted to the pre-entry block or outside of the scope.
+static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
+    CHRScope *Scope, BasicBlock *PreEntryBlock) {
+  CHR_DEBUG(dbgs() << "Biased regions condition values \n");
+  for (RegInfo &RI : Scope->CHRRegions) {
+    Region *R = RI.R;
+    bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+    bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+    if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+      auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+      Value *V = BI->getCondition();
+      CHR_DEBUG(dbgs() << *V << "\n");
+      if (auto *I = dyn_cast<Instruction>(V)) {
+        (void)(I); // Unused in release build.
+        assert((I->getParent() == PreEntryBlock ||
+                !Scope->contains(I)) &&
+               "Must have been hoisted to PreEntryBlock or outside the scope");
+      }
+    }
+    for (SelectInst *SI : RI.Selects) {
+      bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+      bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+      if (!(IsTrueBiased || IsFalseBiased))
+        continue;
+      Value *V = SI->getCondition();
+      CHR_DEBUG(dbgs() << *V << "\n");
+      if (auto *I = dyn_cast<Instruction>(V)) {
+        (void)(I); // Unused in release build.
+        assert((I->getParent() == PreEntryBlock ||
+                !Scope->contains(I)) &&
+               "Must have been hoisted to PreEntryBlock or outside the scope");
+      }
+    }
+  }
+}
+
+void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
+  CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
+
+  assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
+  Region *FirstRegion = Scope->RegInfos[0].R;
+  BasicBlock *EntryBlock = FirstRegion->getEntry();
+  Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
+  BasicBlock *ExitBlock = LastRegion->getExit();
+  Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
+
+  if (ExitBlock) {
+    // Insert a trivial phi at the exit block (where the CHR hot path and the
+    // cold path merges) for a value that's defined in the scope but used
+    // outside it (meaning it's alive at the exit block). We will add the
+    // incoming values for the CHR cold paths to it below. Without this, we'd
+    // miss updating phi's for such values unless there happens to already be a
+    // phi for that value there.
+    insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
+  }
+
+  // Split the entry block of the first region. The new block becomes the new
+  // entry block of the first region. The old entry block becomes the block to
+  // insert the CHR branch into. Note DT gets updated. Since DT gets updated
+  // through the split, we update the entry of the first region after the split,
+  // and Region only points to the entry and the exit blocks, rather than
+  // keeping everything in a list or set, the blocks membership and the
+  // entry/exit blocks of the region are still valid after the split.
+  CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
+            << " at " << *Scope->BranchInsertPoint << "\n");
+  BasicBlock *NewEntryBlock =
+      SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
+  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+         "NewEntryBlock's only pred must be EntryBlock");
+  FirstRegion->replaceEntryRecursive(NewEntryBlock);
+  BasicBlock *PreEntryBlock = EntryBlock;
+
+  ValueToValueMapTy VMap;
+  // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
+  // hot path (originals) and a cold path (clones) and update the PHIs at the
+  // exit block.
+  cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
+
+  // Replace the old (placeholder) branch with the new (merged) conditional
+  // branch.
+  BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
+                                            NewEntryBlock, VMap);
+
+#ifndef NDEBUG
+  assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
+#endif
+
+  // Hoist the conditional values of the branches/selects.
+  hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
+
+#ifndef NDEBUG
+  assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
+#endif
+
+  // Create the combined branch condition and constant-fold the branches/selects
+  // in the hot path.
+  fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
+                          ProfileCount ? ProfileCount.getValue() : 0);
+}
+
+// A helper for transformScopes. Clone the blocks in the scope (excluding the
+// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
+// at the exit block.
+void CHR::cloneScopeBlocks(CHRScope *Scope,
+                           BasicBlock *PreEntryBlock,
+                           BasicBlock *ExitBlock,
+                           Region *LastRegion,
+                           ValueToValueMapTy &VMap) {
+  // Clone all the blocks. The original blocks will be the hot-path
+  // CHR-optimized code and the cloned blocks will be the original unoptimized
+  // code. This is so that the block pointers from the
+  // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
+  // which CHR should apply to.
+  SmallVector<BasicBlock*, 8> NewBlocks;
+  for (RegInfo &RI : Scope->RegInfos)
+    for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+                                            // sub-Scopes.
+      assert(BB != PreEntryBlock && "Don't copy the preetntry block");
+      BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
+      NewBlocks.push_back(NewBB);
+      VMap[BB] = NewBB;
+    }
+
+  // Place the cloned blocks right after the original blocks (right before the
+  // exit block of.)
+  if (ExitBlock)
+    F.getBasicBlockList().splice(ExitBlock->getIterator(),
+                                 F.getBasicBlockList(),
+                                 NewBlocks[0]->getIterator(), F.end());
+
+  // Update the cloned blocks/instructions to refer to themselves.
+  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+    for (Instruction &I : *NewBlocks[i])
+      RemapInstruction(&I, VMap,
+                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+  // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
+  // the top-level region but we don't need to add PHIs. The trivial PHIs
+  // inserted above will be updated here.
+  if (ExitBlock)
+    for (PHINode &PN : ExitBlock->phis())
+      for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
+           ++I) {
+        BasicBlock *Pred = PN.getIncomingBlock(I);
+        if (LastRegion->contains(Pred)) {
+          Value *V = PN.getIncomingValue(I);
+          auto It = VMap.find(V);
+          if (It != VMap.end()) V = It->second;
+          assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
+          PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
+        }
+      }
+}
+
+// A helper for transformScope. Replace the old (placeholder) branch with the
+// new (merged) conditional branch.
+BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
+                                    BasicBlock *EntryBlock,
+                                    BasicBlock *NewEntryBlock,
+                                    ValueToValueMapTy &VMap) {
+  BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
+  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
+         "SplitBlock did not work correctly!");
+  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+         "NewEntryBlock's only pred must be EntryBlock");
+  assert(VMap.find(NewEntryBlock) != VMap.end() &&
+         "NewEntryBlock must have been copied");
+  OldBR->dropAllReferences();
+  OldBR->eraseFromParent();
+  // The true predicate is a placeholder. It will be replaced later in
+  // fixupBranchesAndSelects().
+  BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
+                                         cast<BasicBlock>(VMap[NewEntryBlock]),
+                                         ConstantInt::getTrue(F.getContext()));
+  PreEntryBlock->getInstList().push_back(NewBR);
+  assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+         "NewEntryBlock's only pred must be EntryBlock");
+  return NewBR;
+}
+
+// A helper for transformScopes. Create the combined branch condition and
+// constant-fold the branches/selects in the hot path.
+void CHR::fixupBranchesAndSelects(CHRScope *Scope,
+                                  BasicBlock *PreEntryBlock,
+                                  BranchInst *MergedBR,
+                                  uint64_t ProfileCount) {
+  Value *MergedCondition = ConstantInt::getTrue(F.getContext());
+  BranchProbability CHRBranchBias(1, 1);
+  uint64_t NumCHRedBranches = 0;
+  IRBuilder<> IRB(PreEntryBlock->getTerminator());
+  for (RegInfo &RI : Scope->CHRRegions) {
+    Region *R = RI.R;
+    if (RI.HasBranch) {
+      fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
+      ++NumCHRedBranches;
+    }
+    for (SelectInst *SI : RI.Selects) {
+      fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
+      ++NumCHRedBranches;
+    }
+  }
+  Stats.NumBranchesDelta += NumCHRedBranches - 1;
+  Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
+  ORE.emit([&]() {
+    return OptimizationRemark(DEBUG_TYPE,
+                              "CHR",
+                              // Refer to the hot (original) path
+                              MergedBR->getSuccessor(0)->getTerminator())
+        << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
+        << " branches or selects";
+  });
+  MergedBR->setCondition(MergedCondition);
+  uint32_t Weights[] = {
+      static_cast<uint32_t>(CHRBranchBias.scale(1000)),
+      static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
+  };
+  MDBuilder MDB(F.getContext());
+  MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+  CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
+            << "\n");
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a branch in the hot path.
+void CHR::fixupBranch(Region *R, CHRScope *Scope,
+                      IRBuilder<> &IRB,
+                      Value *&MergedCondition,
+                      BranchProbability &CHRBranchBias) {
+  bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+  assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
+         "Must be truthy or falsy");
+  auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+  assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
+         "Must be in the bias map");
+  BranchProbability Bias = BranchBiasMap[R];
+  assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+  // Take the min.
+  if (CHRBranchBias > Bias)
+    CHRBranchBias = Bias;
+  BasicBlock *IfThen = BI->getSuccessor(1);
+  BasicBlock *IfElse = BI->getSuccessor(0);
+  BasicBlock *RegionExitBlock = R->getExit();
+  assert(RegionExitBlock && "Null ExitBlock");
+  assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
+         IfThen != IfElse && "Invariant from findScopes");
+  if (IfThen == RegionExitBlock) {
+    // Swap them so that IfThen means going into it and IfElse means skipping
+    // it.
+    std::swap(IfThen, IfElse);
+  }
+  CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
+            << " IfElse " << IfElse->getName() << "\n");
+  Value *Cond = BI->getCondition();
+  BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
+  bool ConditionTrue = HotTarget == BI->getSuccessor(0);
+  addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
+                       MergedCondition);
+  // Constant-fold the branch at ClonedEntryBlock.
+  assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
+         "The successor shouldn't change");
+  Value *NewCondition = ConditionTrue ?
+                        ConstantInt::getTrue(F.getContext()) :
+                        ConstantInt::getFalse(F.getContext());
+  BI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a select in the hot path.
+void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
+                      IRBuilder<> &IRB,
+                      Value *&MergedCondition,
+                      BranchProbability &CHRBranchBias) {
+  bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+  assert((IsTrueBiased ||
+          Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
+  assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
+         "Must be in the bias map");
+  BranchProbability Bias = SelectBiasMap[SI];
+  assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+  // Take the min.
+  if (CHRBranchBias > Bias)
+    CHRBranchBias = Bias;
+  Value *Cond = SI->getCondition();
+  addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
+                       MergedCondition);
+  Value *NewCondition = IsTrueBiased ?
+                        ConstantInt::getTrue(F.getContext()) :
+                        ConstantInt::getFalse(F.getContext());
+  SI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
+// condition.
+void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
+                               Instruction *BranchOrSelect,
+                               CHRScope *Scope,
+                               IRBuilder<> &IRB,
+                               Value *&MergedCondition) {
+  if (IsTrueBiased) {
+    MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+  } else {
+    // If Cond is an icmp and all users of V except for BranchOrSelect is a
+    // branch, negate the icmp predicate and swap the branch targets and avoid
+    // inserting an Xor to negate Cond.
+    bool Done = false;
+    if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
+      if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
+        MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+        Done = true;
+      }
+    if (!Done) {
+      Value *Negate = IRB.CreateXor(
+          ConstantInt::getTrue(F.getContext()), Cond);
+      MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
+    }
+  }
+}
+
+void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
+  unsigned I = 0;
+  DenseSet<PHINode *> TrivialPHIs;
+  for (CHRScope *Scope : CHRScopes) {
+    transformScopes(Scope, TrivialPHIs);
+    CHR_DEBUG(
+        std::ostringstream oss;
+        oss << " after transformScopes " << I++;
+        dumpIR(F, oss.str().c_str(), nullptr));
+    (void)I;
+  }
+}
+
+static void LLVM_ATTRIBUTE_UNUSED
+dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
+  dbgs() << Label << " " << Scopes.size() << "\n";
+  for (CHRScope *Scope : Scopes) {
+    dbgs() << *Scope << "\n";
+  }
+}
+
+bool CHR::run() {
+  if (!shouldApply(F, PSI))
+    return false;
+
+  CHR_DEBUG(dumpIR(F, "before", nullptr));
+
+  bool Changed = false;
+  {
+    CHR_DEBUG(
+        dbgs() << "RegionInfo:\n";
+        RI.print(dbgs()));
+
+    // Recursively traverse the region tree and find regions that have biased
+    // branches and/or selects and create scopes.
+    SmallVector<CHRScope *, 8> AllScopes;
+    findScopes(AllScopes);
+    CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
+
+    // Split the scopes if 1) the conditiona values of the biased
+    // branches/selects of the inner/lower scope can't be hoisted up to the
+    // outermost/uppermost scope entry, or 2) the condition values of the biased
+    // branches/selects in a scope (including subscopes) don't share at least
+    // one common value.
+    SmallVector<CHRScope *, 8> SplitScopes;
+    splitScopes(AllScopes, SplitScopes);
+    CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
+
+    // After splitting, set the biased regions and selects of a scope (a tree
+    // root) that include those of the subscopes.
+    classifyBiasedScopes(SplitScopes);
+    CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
+
+    // Filter out the scopes that has only one biased region or select (CHR
+    // isn't useful in such a case).
+    SmallVector<CHRScope *, 8> FilteredScopes;
+    filterScopes(SplitScopes, FilteredScopes);
+    CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
+
+    // Set the regions to be CHR'ed and their hoist stops for each scope.
+    SmallVector<CHRScope *, 8> SetScopes;
+    setCHRRegions(FilteredScopes, SetScopes);
+    CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
+
+    // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
+    // ones. We need to apply CHR from outer to inner so that we apply CHR only
+    // to the hot path, rather than both hot and cold paths.
+    SmallVector<CHRScope *, 8> SortedScopes;
+    sortScopes(SetScopes, SortedScopes);
+    CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
+
+    CHR_DEBUG(
+        dbgs() << "RegionInfo:\n";
+        RI.print(dbgs()));
+
+    // Apply the CHR transformation.
+    if (!SortedScopes.empty()) {
+      transformScopes(SortedScopes);
+      Changed = true;
+    }
+  }
+
+  if (Changed) {
+    CHR_DEBUG(dumpIR(F, "after", &Stats));
+    ORE.emit([&]() {
+      return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
+          << ore::NV("Function", &F) << " "
+          << "Reduced the number of branches in hot paths by "
+          << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
+          << " (static) and "
+          << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
+          << " (weighted by PGO count)";
+    });
+  }
+
+  return Changed;
+}
+
+bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
+  BlockFrequencyInfo &BFI =
+      getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  ProfileSummaryInfo &PSI =
+      getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
+  std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
+      std::make_unique<OptimizationRemarkEmitter>(&F);
+  return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
+}
+
+namespace llvm {
+
+ControlHeightReductionPass::ControlHeightReductionPass() {
+  parseCHRFilterFiles();
+}
+
+PreservedAnalyses ControlHeightReductionPass::run(
+    Function &F,
+    FunctionAnalysisManager &FAM) {
+  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+  auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+  auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+  auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
+  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
+  if (!Changed)
+    return PreservedAnalyses::all();
+  auto PA = PreservedAnalyses();
+  PA.preserve<GlobalsAA>();
+  return PA;
+}
+
+} // namespace llvm
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index ebd7a997dd..1b14b8d569 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -1,111 +1,111 @@
-//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-/// \file 
-/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow 
-/// analysis. 
-/// 
-/// Unlike other Sanitizer tools, this tool is not designed to detect a specific 
-/// class of bugs on its own.  Instead, it provides a generic dynamic data flow 
-/// analysis framework to be used by clients to help detect application-specific 
-/// issues within their own code. 
-/// 
-/// The analysis is based on automatic propagation of data flow labels (also 
-/// known as taint labels) through a program as it performs computation.  Each 
-/// byte of application memory is backed by two bytes of shadow memory which 
-/// hold the label.  On Linux/x86_64, memory is laid out as follows: 
-/// 
-/// +--------------------+ 0x800000000000 (top of memory) 
-/// | application memory | 
-/// +--------------------+ 0x700000008000 (kAppAddr) 
-/// |                    | 
-/// |       unused       | 
-/// |                    | 
-/// +--------------------+ 0x200200000000 (kUnusedAddr) 
-/// |    union table     | 
-/// +--------------------+ 0x200000000000 (kUnionTableAddr) 
-/// |   shadow memory    | 
-/// +--------------------+ 0x000000010000 (kShadowAddr) 
-/// | reserved by kernel | 
-/// +--------------------+ 0x000000000000 
-/// 
-/// To derive a shadow memory address from an application memory address, 
-/// bits 44-46 are cleared to bring the address into the range 
-/// [0x000000008000,0x100000000000).  Then the address is shifted left by 1 to 
-/// account for the double byte representation of shadow labels and move the 
-/// address into the shadow memory range.  See the function 
-/// DataFlowSanitizer::getShadowAddress below. 
-/// 
-/// For more information, please refer to the design document: 
-/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html 
-// 
-//===----------------------------------------------------------------------===// 
- 
+//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
+/// analysis.
+///
+/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
+/// class of bugs on its own.  Instead, it provides a generic dynamic data flow
+/// analysis framework to be used by clients to help detect application-specific
+/// issues within their own code.
+///
+/// The analysis is based on automatic propagation of data flow labels (also
+/// known as taint labels) through a program as it performs computation.  Each
+/// byte of application memory is backed by two bytes of shadow memory which
+/// hold the label.  On Linux/x86_64, memory is laid out as follows:
+///
+/// +--------------------+ 0x800000000000 (top of memory)
+/// | application memory |
+/// +--------------------+ 0x700000008000 (kAppAddr)
+/// |                    |
+/// |       unused       |
+/// |                    |
+/// +--------------------+ 0x200200000000 (kUnusedAddr)
+/// |    union table     |
+/// +--------------------+ 0x200000000000 (kUnionTableAddr)
+/// |   shadow memory    |
+/// +--------------------+ 0x000000010000 (kShadowAddr)
+/// | reserved by kernel |
+/// +--------------------+ 0x000000000000
+///
+/// To derive a shadow memory address from an application memory address,
+/// bits 44-46 are cleared to bring the address into the range
+/// [0x000000008000,0x100000000000).  Then the address is shifted left by 1 to
+/// account for the double byte representation of shadow labels and move the
+/// address into the shadow memory range.  See the function
+/// DataFlowSanitizer::getShadowAddress below.
+///
+/// For more information, please refer to the design document:
+/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
+//
+//===----------------------------------------------------------------------===//
+
 #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
-#include "llvm/ADT/DenseMap.h" 
-#include "llvm/ADT/DenseSet.h" 
-#include "llvm/ADT/DepthFirstIterator.h" 
-#include "llvm/ADT/None.h" 
-#include "llvm/ADT/SmallPtrSet.h" 
-#include "llvm/ADT/SmallVector.h" 
-#include "llvm/ADT/StringExtras.h" 
-#include "llvm/ADT/StringRef.h" 
-#include "llvm/ADT/Triple.h" 
-#include "llvm/Analysis/ValueTracking.h" 
-#include "llvm/IR/Argument.h" 
-#include "llvm/IR/Attributes.h" 
-#include "llvm/IR/BasicBlock.h" 
-#include "llvm/IR/Constant.h" 
-#include "llvm/IR/Constants.h" 
-#include "llvm/IR/DataLayout.h" 
-#include "llvm/IR/DerivedTypes.h" 
-#include "llvm/IR/Dominators.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/GlobalAlias.h" 
-#include "llvm/IR/GlobalValue.h" 
-#include "llvm/IR/GlobalVariable.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/InlineAsm.h" 
-#include "llvm/IR/InstVisitor.h" 
-#include "llvm/IR/InstrTypes.h" 
-#include "llvm/IR/Instruction.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/IR/LLVMContext.h" 
-#include "llvm/IR/MDBuilder.h" 
-#include "llvm/IR/Module.h" 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
-#include "llvm/IR/Type.h" 
-#include "llvm/IR/User.h" 
-#include "llvm/IR/Value.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Pass.h" 
-#include "llvm/Support/Casting.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/ErrorHandling.h" 
-#include "llvm/Support/SpecialCaseList.h" 
-#include "llvm/Support/VirtualFileSystem.h" 
-#include "llvm/Transforms/Instrumentation.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include "llvm/Transforms/Utils/Local.h" 
-#include <algorithm> 
-#include <cassert> 
-#include <cstddef> 
-#include <cstdint> 
-#include <iterator> 
-#include <memory> 
-#include <set> 
-#include <string> 
-#include <utility> 
-#include <vector> 
- 
-using namespace llvm; 
- 
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SpecialCaseList.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
 // This must be consistent with ShadowWidthBits.
 static const Align kShadowTLSAlignment = Align(2);
 
@@ -114,78 +114,78 @@ static const Align kShadowTLSAlignment = Align(2);
 static const unsigned kArgTLSSize = 800;
 static const unsigned kRetvalTLSSize = 800;
 
-// External symbol to be used when generating the shadow address for 
-// architectures with multiple VMAs. Instead of using a constant integer 
-// the runtime will set the external mask based on the VMA range. 
+// External symbol to be used when generating the shadow address for
+// architectures with multiple VMAs. Instead of using a constant integer
+// the runtime will set the external mask based on the VMA range.
 const char kDFSanExternShadowPtrMask[] = "__dfsan_shadow_ptr_mask";
- 
-// The -dfsan-preserve-alignment flag controls whether this pass assumes that 
-// alignment requirements provided by the input IR are correct.  For example, 
-// if the input IR contains a load with alignment 8, this flag will cause 
-// the shadow load to have alignment 16.  This flag is disabled by default as 
-// we have unfortunately encountered too much code (including Clang itself; 
-// see PR14291) which performs misaligned access. 
-static cl::opt<bool> ClPreserveAlignment( 
-    "dfsan-preserve-alignment", 
-    cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, 
-    cl::init(false)); 
- 
-// The ABI list files control how shadow parameters are passed. The pass treats 
-// every function labelled "uninstrumented" in the ABI list file as conforming 
-// to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains 
-// additional annotations for those functions, a call to one of those functions 
-// will produce a warning message, as the labelling behaviour of the function is 
-// unknown.  The other supported annotations are "functional" and "discard", 
-// which are described below under DataFlowSanitizer::WrapperKind. 
-static cl::list<std::string> ClABIListFiles( 
-    "dfsan-abilist", 
-    cl::desc("File listing native ABI functions and how the pass treats them"), 
-    cl::Hidden); 
- 
-// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented 
-// functions (see DataFlowSanitizer::InstrumentedABI below). 
-static cl::opt<bool> ClArgsABI( 
-    "dfsan-args-abi", 
-    cl::desc("Use the argument ABI rather than the TLS ABI"), 
-    cl::Hidden); 
- 
-// Controls whether the pass includes or ignores the labels of pointers in load 
-// instructions. 
-static cl::opt<bool> ClCombinePointerLabelsOnLoad( 
-    "dfsan-combine-pointer-labels-on-load", 
-    cl::desc("Combine the label of the pointer with the label of the data when " 
-             "loading from memory."), 
-    cl::Hidden, cl::init(true)); 
- 
-// Controls whether the pass includes or ignores the labels of pointers in 
-// stores instructions. 
-static cl::opt<bool> ClCombinePointerLabelsOnStore( 
-    "dfsan-combine-pointer-labels-on-store", 
-    cl::desc("Combine the label of the pointer with the label of the data when " 
-             "storing in memory."), 
-    cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClDebugNonzeroLabels( 
-    "dfsan-debug-nonzero-labels", 
-    cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " 
-             "load or return with a nonzero label"), 
-    cl::Hidden); 
- 
-// Experimental feature that inserts callbacks for certain data events. 
-// Currently callbacks are only inserted for loads, stores, memory transfers 
-// (i.e. memcpy and memmove), and comparisons. 
-// 
-// If this flag is set to true, the user must provide definitions for the 
-// following callback functions: 
+
+// The -dfsan-preserve-alignment flag controls whether this pass assumes that
+// alignment requirements provided by the input IR are correct.  For example,
+// if the input IR contains a load with alignment 8, this flag will cause
+// the shadow load to have alignment 16.  This flag is disabled by default as
+// we have unfortunately encountered too much code (including Clang itself;
+// see PR14291) which performs misaligned access.
+static cl::opt<bool> ClPreserveAlignment(
+    "dfsan-preserve-alignment",
+    cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
+    cl::init(false));
+
+// The ABI list files control how shadow parameters are passed. The pass treats
+// every function labelled "uninstrumented" in the ABI list file as conforming
+// to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
+// additional annotations for those functions, a call to one of those functions
+// will produce a warning message, as the labelling behaviour of the function is
+// unknown.  The other supported annotations are "functional" and "discard",
+// which are described below under DataFlowSanitizer::WrapperKind.
+static cl::list<std::string> ClABIListFiles(
+    "dfsan-abilist",
+    cl::desc("File listing native ABI functions and how the pass treats them"),
+    cl::Hidden);
+
+// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
+// functions (see DataFlowSanitizer::InstrumentedABI below).
+static cl::opt<bool> ClArgsABI(
+    "dfsan-args-abi",
+    cl::desc("Use the argument ABI rather than the TLS ABI"),
+    cl::Hidden);
+
+// Controls whether the pass includes or ignores the labels of pointers in load
+// instructions.
+static cl::opt<bool> ClCombinePointerLabelsOnLoad(
+    "dfsan-combine-pointer-labels-on-load",
+    cl::desc("Combine the label of the pointer with the label of the data when "
+             "loading from memory."),
+    cl::Hidden, cl::init(true));
+
+// Controls whether the pass includes or ignores the labels of pointers in
+// stores instructions.
+static cl::opt<bool> ClCombinePointerLabelsOnStore(
+    "dfsan-combine-pointer-labels-on-store",
+    cl::desc("Combine the label of the pointer with the label of the data when "
+             "storing in memory."),
+    cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClDebugNonzeroLabels(
+    "dfsan-debug-nonzero-labels",
+    cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
+             "load or return with a nonzero label"),
+    cl::Hidden);
+
+// Experimental feature that inserts callbacks for certain data events.
+// Currently callbacks are only inserted for loads, stores, memory transfers
+// (i.e. memcpy and memmove), and comparisons.
+//
+// If this flag is set to true, the user must provide definitions for the
+// following callback functions:
 //   void __dfsan_load_callback(dfsan_label Label, void* addr);
 //   void __dfsan_store_callback(dfsan_label Label, void* addr);
-//   void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len); 
-//   void __dfsan_cmp_callback(dfsan_label CombinedLabel); 
-static cl::opt<bool> ClEventCallbacks( 
-    "dfsan-event-callbacks", 
-    cl::desc("Insert calls to __dfsan_*_callback functions on data events."), 
-    cl::Hidden, cl::init(false)); 
- 
+//   void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
+//   void __dfsan_cmp_callback(dfsan_label CombinedLabel);
+static cl::opt<bool> ClEventCallbacks(
+    "dfsan-event-callbacks",
+    cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
+    cl::Hidden, cl::init(false));
+
 // Use a distinct bit for each base label, enabling faster unions with less
 // instrumentation.  Limits the max number of base labels to 16.
 static cl::opt<bool> ClFast16Labels(
@@ -201,220 +201,220 @@ static cl::opt<bool> ClTrackSelectControlFlow(
              "to results."),
     cl::Hidden, cl::init(true));
 
-static StringRef GetGlobalTypeString(const GlobalValue &G) { 
-  // Types of GlobalVariables are always pointer types. 
-  Type *GType = G.getValueType(); 
-  // For now we support excluding struct types only. 
-  if (StructType *SGType = dyn_cast<StructType>(GType)) { 
-    if (!SGType->isLiteral()) 
-      return SGType->getName(); 
-  } 
-  return "<unknown type>"; 
-} 
- 
-namespace { 
- 
-class DFSanABIList { 
-  std::unique_ptr<SpecialCaseList> SCL; 
- 
- public: 
-  DFSanABIList() = default; 
- 
-  void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); } 
- 
-  /// Returns whether either this function or its source file are listed in the 
-  /// given category. 
-  bool isIn(const Function &F, StringRef Category) const { 
-    return isIn(*F.getParent(), Category) || 
-           SCL->inSection("dataflow", "fun", F.getName(), Category); 
-  } 
- 
-  /// Returns whether this global alias is listed in the given category. 
-  /// 
-  /// If GA aliases a function, the alias's name is matched as a function name 
-  /// would be.  Similarly, aliases of globals are matched like globals. 
-  bool isIn(const GlobalAlias &GA, StringRef Category) const { 
-    if (isIn(*GA.getParent(), Category)) 
-      return true; 
- 
-    if (isa<FunctionType>(GA.getValueType())) 
-      return SCL->inSection("dataflow", "fun", GA.getName(), Category); 
- 
-    return SCL->inSection("dataflow", "global", GA.getName(), Category) || 
-           SCL->inSection("dataflow", "type", GetGlobalTypeString(GA), 
-                          Category); 
-  } 
- 
-  /// Returns whether this module is listed in the given category. 
-  bool isIn(const Module &M, StringRef Category) const { 
-    return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category); 
-  } 
-}; 
- 
-/// TransformedFunction is used to express the result of transforming one 
-/// function type into another.  This struct is immutable.  It holds metadata 
-/// useful for updating calls of the old function to the new type. 
-struct TransformedFunction { 
-  TransformedFunction(FunctionType* OriginalType, 
-                      FunctionType* TransformedType, 
-                      std::vector<unsigned> ArgumentIndexMapping) 
-      : OriginalType(OriginalType), 
-        TransformedType(TransformedType), 
-        ArgumentIndexMapping(ArgumentIndexMapping) {} 
- 
-  // Disallow copies. 
-  TransformedFunction(const TransformedFunction&) = delete; 
-  TransformedFunction& operator=(const TransformedFunction&) = delete; 
- 
-  // Allow moves. 
-  TransformedFunction(TransformedFunction&&) = default; 
-  TransformedFunction& operator=(TransformedFunction&&) = default; 
- 
-  /// Type of the function before the transformation. 
-  FunctionType *OriginalType; 
- 
-  /// Type of the function after the transformation. 
-  FunctionType *TransformedType; 
- 
-  /// Transforming a function may change the position of arguments.  This 
-  /// member records the mapping from each argument's old position to its new 
-  /// position.  Argument positions are zero-indexed.  If the transformation 
-  /// from F to F' made the first argument of F into the third argument of F', 
-  /// then ArgumentIndexMapping[0] will equal 2. 
-  std::vector<unsigned> ArgumentIndexMapping; 
-}; 
- 
-/// Given function attributes from a call site for the original function, 
-/// return function attributes appropriate for a call to the transformed 
-/// function. 
-AttributeList TransformFunctionAttributes( 
-    const TransformedFunction& TransformedFunction, 
-    LLVMContext& Ctx, AttributeList CallSiteAttrs) { 
- 
-  // Construct a vector of AttributeSet for each function argument. 
-  std::vector<llvm::AttributeSet> ArgumentAttributes( 
-      TransformedFunction.TransformedType->getNumParams()); 
- 
-  // Copy attributes from the parameter of the original function to the 
-  // transformed version.  'ArgumentIndexMapping' holds the mapping from 
-  // old argument position to new. 
-  for (unsigned i=0, ie = TransformedFunction.ArgumentIndexMapping.size(); 
-       i < ie; ++i) { 
-    unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[i]; 
-    ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttributes(i); 
-  } 
- 
-  // Copy annotations on varargs arguments. 
-  for (unsigned i = TransformedFunction.OriginalType->getNumParams(), 
-       ie = CallSiteAttrs.getNumAttrSets(); i<ie; ++i) { 
-    ArgumentAttributes.push_back(CallSiteAttrs.getParamAttributes(i)); 
-  } 
- 
-  return AttributeList::get( 
-      Ctx, 
-      CallSiteAttrs.getFnAttributes(), 
-      CallSiteAttrs.getRetAttributes(), 
-      llvm::makeArrayRef(ArgumentAttributes)); 
-} 
- 
+static StringRef GetGlobalTypeString(const GlobalValue &G) {
+  // Types of GlobalVariables are always pointer types.
+  Type *GType = G.getValueType();
+  // For now we support excluding struct types only.
+  if (StructType *SGType = dyn_cast<StructType>(GType)) {
+    if (!SGType->isLiteral())
+      return SGType->getName();
+  }
+  return "<unknown type>";
+}
+
+namespace {
+
+class DFSanABIList {
+  std::unique_ptr<SpecialCaseList> SCL;
+
+ public:
+  DFSanABIList() = default;
+
+  void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
+
+  /// Returns whether either this function or its source file are listed in the
+  /// given category.
+  bool isIn(const Function &F, StringRef Category) const {
+    return isIn(*F.getParent(), Category) ||
+           SCL->inSection("dataflow", "fun", F.getName(), Category);
+  }
+
+  /// Returns whether this global alias is listed in the given category.
+  ///
+  /// If GA aliases a function, the alias's name is matched as a function name
+  /// would be.  Similarly, aliases of globals are matched like globals.
+  bool isIn(const GlobalAlias &GA, StringRef Category) const {
+    if (isIn(*GA.getParent(), Category))
+      return true;
+
+    if (isa<FunctionType>(GA.getValueType()))
+      return SCL->inSection("dataflow", "fun", GA.getName(), Category);
+
+    return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
+           SCL->inSection("dataflow", "type", GetGlobalTypeString(GA),
+                          Category);
+  }
+
+  /// Returns whether this module is listed in the given category.
+  bool isIn(const Module &M, StringRef Category) const {
+    return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
+  }
+};
+
+/// TransformedFunction is used to express the result of transforming one
+/// function type into another.  This struct is immutable.  It holds metadata
+/// useful for updating calls of the old function to the new type.
+struct TransformedFunction {
+  TransformedFunction(FunctionType* OriginalType,
+                      FunctionType* TransformedType,
+                      std::vector<unsigned> ArgumentIndexMapping)
+      : OriginalType(OriginalType),
+        TransformedType(TransformedType),
+        ArgumentIndexMapping(ArgumentIndexMapping) {}
+
+  // Disallow copies.
+  TransformedFunction(const TransformedFunction&) = delete;
+  TransformedFunction& operator=(const TransformedFunction&) = delete;
+
+  // Allow moves.
+  TransformedFunction(TransformedFunction&&) = default;
+  TransformedFunction& operator=(TransformedFunction&&) = default;
+
+  /// Type of the function before the transformation.
+  FunctionType *OriginalType;
+
+  /// Type of the function after the transformation.
+  FunctionType *TransformedType;
+
+  /// Transforming a function may change the position of arguments.  This
+  /// member records the mapping from each argument's old position to its new
+  /// position.  Argument positions are zero-indexed.  If the transformation
+  /// from F to F' made the first argument of F into the third argument of F',
+  /// then ArgumentIndexMapping[0] will equal 2.
+  std::vector<unsigned> ArgumentIndexMapping;
+};
+
+/// Given function attributes from a call site for the original function,
+/// return function attributes appropriate for a call to the transformed
+/// function.
+AttributeList TransformFunctionAttributes(
+    const TransformedFunction& TransformedFunction,
+    LLVMContext& Ctx, AttributeList CallSiteAttrs) {
+
+  // Construct a vector of AttributeSet for each function argument.
+  std::vector<llvm::AttributeSet> ArgumentAttributes(
+      TransformedFunction.TransformedType->getNumParams());
+
+  // Copy attributes from the parameter of the original function to the
+  // transformed version.  'ArgumentIndexMapping' holds the mapping from
+  // old argument position to new.
+  for (unsigned i=0, ie = TransformedFunction.ArgumentIndexMapping.size();
+       i < ie; ++i) {
+    unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[i];
+    ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttributes(i);
+  }
+
+  // Copy annotations on varargs arguments.
+  for (unsigned i = TransformedFunction.OriginalType->getNumParams(),
+       ie = CallSiteAttrs.getNumAttrSets(); i<ie; ++i) {
+    ArgumentAttributes.push_back(CallSiteAttrs.getParamAttributes(i));
+  }
+
+  return AttributeList::get(
+      Ctx,
+      CallSiteAttrs.getFnAttributes(),
+      CallSiteAttrs.getRetAttributes(),
+      llvm::makeArrayRef(ArgumentAttributes));
+}
+
 class DataFlowSanitizer {
-  friend struct DFSanFunction; 
-  friend class DFSanVisitor; 
- 
-  enum { ShadowWidthBits = 16, ShadowWidthBytes = ShadowWidthBits / 8 }; 
- 
-  /// Which ABI should be used for instrumented functions? 
-  enum InstrumentedABI { 
-    /// Argument and return value labels are passed through additional 
-    /// arguments and by modifying the return type. 
-    IA_Args, 
- 
-    /// Argument and return value labels are passed through TLS variables 
-    /// __dfsan_arg_tls and __dfsan_retval_tls. 
-    IA_TLS 
-  }; 
- 
-  /// How should calls to uninstrumented functions be handled? 
-  enum WrapperKind { 
-    /// This function is present in an uninstrumented form but we don't know 
-    /// how it should be handled.  Print a warning and call the function anyway. 
-    /// Don't label the return value. 
-    WK_Warning, 
- 
-    /// This function does not write to (user-accessible) memory, and its return 
-    /// value is unlabelled. 
-    WK_Discard, 
- 
-    /// This function does not write to (user-accessible) memory, and the label 
-    /// of its return value is the union of the label of its arguments. 
-    WK_Functional, 
- 
-    /// Instead of calling the function, a custom wrapper __dfsw_F is called, 
-    /// where F is the name of the function.  This function may wrap the 
-    /// original function or provide its own implementation.  This is similar to 
-    /// the IA_Args ABI, except that IA_Args uses a struct return type to 
-    /// pass the return value shadow in a register, while WK_Custom uses an 
-    /// extra pointer argument to return the shadow.  This allows the wrapped 
-    /// form of the function type to be expressed in C. 
-    WK_Custom 
-  }; 
- 
-  Module *Mod; 
-  LLVMContext *Ctx; 
+  friend struct DFSanFunction;
+  friend class DFSanVisitor;
+
+  enum { ShadowWidthBits = 16, ShadowWidthBytes = ShadowWidthBits / 8 };
+
+  /// Which ABI should be used for instrumented functions?
+  enum InstrumentedABI {
+    /// Argument and return value labels are passed through additional
+    /// arguments and by modifying the return type.
+    IA_Args,
+
+    /// Argument and return value labels are passed through TLS variables
+    /// __dfsan_arg_tls and __dfsan_retval_tls.
+    IA_TLS
+  };
+
+  /// How should calls to uninstrumented functions be handled?
+  enum WrapperKind {
+    /// This function is present in an uninstrumented form but we don't know
+    /// how it should be handled.  Print a warning and call the function anyway.
+    /// Don't label the return value.
+    WK_Warning,
+
+    /// This function does not write to (user-accessible) memory, and its return
+    /// value is unlabelled.
+    WK_Discard,
+
+    /// This function does not write to (user-accessible) memory, and the label
+    /// of its return value is the union of the label of its arguments.
+    WK_Functional,
+
+    /// Instead of calling the function, a custom wrapper __dfsw_F is called,
+    /// where F is the name of the function.  This function may wrap the
+    /// original function or provide its own implementation.  This is similar to
+    /// the IA_Args ABI, except that IA_Args uses a struct return type to
+    /// pass the return value shadow in a register, while WK_Custom uses an
+    /// extra pointer argument to return the shadow.  This allows the wrapped
+    /// form of the function type to be expressed in C.
+    WK_Custom
+  };
+
+  Module *Mod;
+  LLVMContext *Ctx;
   Type *Int8Ptr;
   /// The shadow type for all primitive types and vector types.
   IntegerType *PrimitiveShadowTy;
   PointerType *PrimitiveShadowPtrTy;
-  IntegerType *IntptrTy; 
+  IntegerType *IntptrTy;
   ConstantInt *ZeroPrimitiveShadow;
-  ConstantInt *ShadowPtrMask; 
-  ConstantInt *ShadowPtrMul; 
-  Constant *ArgTLS; 
-  Constant *RetvalTLS; 
-  Constant *ExternalShadowMask; 
-  FunctionType *DFSanUnionFnTy; 
-  FunctionType *DFSanUnionLoadFnTy; 
-  FunctionType *DFSanUnimplementedFnTy; 
-  FunctionType *DFSanSetLabelFnTy; 
-  FunctionType *DFSanNonzeroLabelFnTy; 
-  FunctionType *DFSanVarargWrapperFnTy; 
+  ConstantInt *ShadowPtrMask;
+  ConstantInt *ShadowPtrMul;
+  Constant *ArgTLS;
+  Constant *RetvalTLS;
+  Constant *ExternalShadowMask;
+  FunctionType *DFSanUnionFnTy;
+  FunctionType *DFSanUnionLoadFnTy;
+  FunctionType *DFSanUnimplementedFnTy;
+  FunctionType *DFSanSetLabelFnTy;
+  FunctionType *DFSanNonzeroLabelFnTy;
+  FunctionType *DFSanVarargWrapperFnTy;
   FunctionType *DFSanCmpCallbackFnTy;
   FunctionType *DFSanLoadStoreCallbackFnTy;
-  FunctionType *DFSanMemTransferCallbackFnTy; 
-  FunctionCallee DFSanUnionFn; 
-  FunctionCallee DFSanCheckedUnionFn; 
-  FunctionCallee DFSanUnionLoadFn; 
+  FunctionType *DFSanMemTransferCallbackFnTy;
+  FunctionCallee DFSanUnionFn;
+  FunctionCallee DFSanCheckedUnionFn;
+  FunctionCallee DFSanUnionLoadFn;
   FunctionCallee DFSanUnionLoadFast16LabelsFn;
-  FunctionCallee DFSanUnimplementedFn; 
-  FunctionCallee DFSanSetLabelFn; 
-  FunctionCallee DFSanNonzeroLabelFn; 
-  FunctionCallee DFSanVarargWrapperFn; 
-  FunctionCallee DFSanLoadCallbackFn; 
-  FunctionCallee DFSanStoreCallbackFn; 
-  FunctionCallee DFSanMemTransferCallbackFn; 
-  FunctionCallee DFSanCmpCallbackFn; 
-  MDNode *ColdCallWeights; 
-  DFSanABIList ABIList; 
-  DenseMap<Value *, Function *> UnwrappedFnMap; 
-  AttrBuilder ReadOnlyNoneAttrs; 
-  bool DFSanRuntimeShadowMask = false; 
- 
-  Value *getShadowAddress(Value *Addr, Instruction *Pos); 
-  bool isInstrumented(const Function *F); 
-  bool isInstrumented(const GlobalAlias *GA); 
-  FunctionType *getArgsFunctionType(FunctionType *T); 
-  FunctionType *getTrampolineFunctionType(FunctionType *T); 
-  TransformedFunction getCustomFunctionType(FunctionType *T); 
-  InstrumentedABI getInstrumentedABI(); 
-  WrapperKind getWrapperKind(Function *F); 
-  void addGlobalNamePrefix(GlobalValue *GV); 
-  Function *buildWrapperFunction(Function *F, StringRef NewFName, 
-                                 GlobalValue::LinkageTypes NewFLink, 
-                                 FunctionType *NewFT); 
-  Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName); 
-  void initializeCallbackFunctions(Module &M); 
-  void initializeRuntimeFunctions(Module &M); 
- 
+  FunctionCallee DFSanUnimplementedFn;
+  FunctionCallee DFSanSetLabelFn;
+  FunctionCallee DFSanNonzeroLabelFn;
+  FunctionCallee DFSanVarargWrapperFn;
+  FunctionCallee DFSanLoadCallbackFn;
+  FunctionCallee DFSanStoreCallbackFn;
+  FunctionCallee DFSanMemTransferCallbackFn;
+  FunctionCallee DFSanCmpCallbackFn;
+  MDNode *ColdCallWeights;
+  DFSanABIList ABIList;
+  DenseMap<Value *, Function *> UnwrappedFnMap;
+  AttrBuilder ReadOnlyNoneAttrs;
+  bool DFSanRuntimeShadowMask = false;
+
+  Value *getShadowAddress(Value *Addr, Instruction *Pos);
+  bool isInstrumented(const Function *F);
+  bool isInstrumented(const GlobalAlias *GA);
+  FunctionType *getArgsFunctionType(FunctionType *T);
+  FunctionType *getTrampolineFunctionType(FunctionType *T);
+  TransformedFunction getCustomFunctionType(FunctionType *T);
+  InstrumentedABI getInstrumentedABI();
+  WrapperKind getWrapperKind(Function *F);
+  void addGlobalNamePrefix(GlobalValue *GV);
+  Function *buildWrapperFunction(Function *F, StringRef NewFName,
+                                 GlobalValue::LinkageTypes NewFLink,
+                                 FunctionType *NewFT);
+  Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
+  void initializeCallbackFunctions(Module &M);
+  void initializeRuntimeFunctions(Module &M);
+
   bool init(Module &M);
 
   /// Returns whether the pass tracks labels for struct fields and array
@@ -448,30 +448,30 @@ class DataFlowSanitizer {
   /// Returns the shadow type of of V's type.
   Type *getShadowTy(Value *V);
 
-public: 
+public:
   DataFlowSanitizer(const std::vector<std::string> &ABIListFiles);
- 
+
   bool runImpl(Module &M);
-}; 
- 
-struct DFSanFunction { 
-  DataFlowSanitizer &DFS; 
-  Function *F; 
-  DominatorTree DT; 
-  DataFlowSanitizer::InstrumentedABI IA; 
-  bool IsNativeABI; 
-  AllocaInst *LabelReturnAlloca = nullptr; 
-  DenseMap<Value *, Value *> ValShadowMap; 
-  DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap; 
-  std::vector<std::pair<PHINode *, PHINode *>> PHIFixups; 
-  DenseSet<Instruction *> SkipInsts; 
-  std::vector<Value *> NonZeroChecks; 
-  bool AvoidNewBlocks; 
- 
+};
+
+struct DFSanFunction {
+  DataFlowSanitizer &DFS;
+  Function *F;
+  DominatorTree DT;
+  DataFlowSanitizer::InstrumentedABI IA;
+  bool IsNativeABI;
+  AllocaInst *LabelReturnAlloca = nullptr;
+  DenseMap<Value *, Value *> ValShadowMap;
+  DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
+  std::vector<std::pair<PHINode *, PHINode *>> PHIFixups;
+  DenseSet<Instruction *> SkipInsts;
+  std::vector<Value *> NonZeroChecks;
+  bool AvoidNewBlocks;
+
   struct CachedShadow {
     BasicBlock *Block; // The block where Shadow is defined.
-    Value *Shadow; 
-  }; 
+    Value *Shadow;
+  };
   /// Maps a value to its latest shadow value in terms of domination tree.
   DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;
   /// Maps a value to its latest collapsed shadow value it was converted to in
@@ -479,16 +479,16 @@ struct DFSanFunction {
   /// used at a post process where CFG blocks are split. So it does not cache
   /// BasicBlock like CachedShadows, but uses domination between values.
   DenseMap<Value *, Value *> CachedCollapsedShadows;
-  DenseMap<Value *, std::set<Value *>> ShadowElements; 
- 
-  DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI) 
-      : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) { 
-    DT.recalculate(*F); 
-    // FIXME: Need to track down the register allocator issue which causes poor 
-    // performance in pathological cases with large numbers of basic blocks. 
-    AvoidNewBlocks = F->size() > 1000; 
-  } 
- 
+  DenseMap<Value *, std::set<Value *>> ShadowElements;
+
+  DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
+      : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {
+    DT.recalculate(*F);
+    // FIXME: Need to track down the register allocator issue which causes poor
+    // performance in pathological cases with large numbers of basic blocks.
+    AvoidNewBlocks = F->size() > 1000;
+  }
+
   /// Computes the shadow address for a given function argument.
   ///
   /// Shadow = ArgTLS+ArgOffset.
@@ -497,18 +497,18 @@ struct DFSanFunction {
   /// Computes the shadow address for a retval.
   Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);
 
-  Value *getShadow(Value *V); 
-  void setShadow(Instruction *I, Value *Shadow); 
+  Value *getShadow(Value *V);
+  void setShadow(Instruction *I, Value *Shadow);
   /// Generates IR to compute the union of the two given shadows, inserting it
   /// before Pos. The combined value is with primitive type.
-  Value *combineShadows(Value *V1, Value *V2, Instruction *Pos); 
+  Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
   /// Combines the shadow values of V1 and V2, then converts the combined value
   /// with primitive type into a shadow value with the original type T.
   Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
                                    Instruction *Pos);
-  Value *combineOperandShadows(Instruction *Inst); 
-  Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align, 
-                    Instruction *Pos); 
+  Value *combineOperandShadows(Instruction *Inst);
+  Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align,
+                    Instruction *Pos);
   void storePrimitiveShadow(Value *Addr, uint64_t Size, Align Alignment,
                             Value *PrimitiveShadow, Instruction *Pos);
   /// Applies PrimitiveShadow to all primitive subtypes of T, returning
@@ -539,110 +539,110 @@ private:
 
   /// Returns the shadow value of an argument A.
   Value *getShadowForTLSArgument(Argument *A);
-}; 
- 
-class DFSanVisitor : public InstVisitor<DFSanVisitor> { 
-public: 
-  DFSanFunction &DFSF; 
- 
-  DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {} 
- 
-  const DataLayout &getDataLayout() const { 
-    return DFSF.F->getParent()->getDataLayout(); 
-  } 
- 
-  // Combines shadow values for all of I's operands. Returns the combined shadow 
-  // value. 
-  Value *visitOperandShadowInst(Instruction &I); 
- 
-  void visitUnaryOperator(UnaryOperator &UO); 
-  void visitBinaryOperator(BinaryOperator &BO); 
-  void visitCastInst(CastInst &CI); 
-  void visitCmpInst(CmpInst &CI); 
-  void visitGetElementPtrInst(GetElementPtrInst &GEPI); 
-  void visitLoadInst(LoadInst &LI); 
-  void visitStoreInst(StoreInst &SI); 
-  void visitReturnInst(ReturnInst &RI); 
-  void visitCallBase(CallBase &CB); 
-  void visitPHINode(PHINode &PN); 
-  void visitExtractElementInst(ExtractElementInst &I); 
-  void visitInsertElementInst(InsertElementInst &I); 
-  void visitShuffleVectorInst(ShuffleVectorInst &I); 
-  void visitExtractValueInst(ExtractValueInst &I); 
-  void visitInsertValueInst(InsertValueInst &I); 
-  void visitAllocaInst(AllocaInst &I); 
-  void visitSelectInst(SelectInst &I); 
-  void visitMemSetInst(MemSetInst &I); 
-  void visitMemTransferInst(MemTransferInst &I); 
-}; 
- 
-} // end anonymous namespace 
- 
-DataFlowSanitizer::DataFlowSanitizer( 
+};
+
+class DFSanVisitor : public InstVisitor<DFSanVisitor> {
+public:
+  DFSanFunction &DFSF;
+
+  DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
+
+  const DataLayout &getDataLayout() const {
+    return DFSF.F->getParent()->getDataLayout();
+  }
+
+  // Combines shadow values for all of I's operands. Returns the combined shadow
+  // value.
+  Value *visitOperandShadowInst(Instruction &I);
+
+  void visitUnaryOperator(UnaryOperator &UO);
+  void visitBinaryOperator(BinaryOperator &BO);
+  void visitCastInst(CastInst &CI);
+  void visitCmpInst(CmpInst &CI);
+  void visitGetElementPtrInst(GetElementPtrInst &GEPI);
+  void visitLoadInst(LoadInst &LI);
+  void visitStoreInst(StoreInst &SI);
+  void visitReturnInst(ReturnInst &RI);
+  void visitCallBase(CallBase &CB);
+  void visitPHINode(PHINode &PN);
+  void visitExtractElementInst(ExtractElementInst &I);
+  void visitInsertElementInst(InsertElementInst &I);
+  void visitShuffleVectorInst(ShuffleVectorInst &I);
+  void visitExtractValueInst(ExtractValueInst &I);
+  void visitInsertValueInst(InsertValueInst &I);
+  void visitAllocaInst(AllocaInst &I);
+  void visitSelectInst(SelectInst &I);
+  void visitMemSetInst(MemSetInst &I);
+  void visitMemTransferInst(MemTransferInst &I);
+};
+
+} // end anonymous namespace
+
+DataFlowSanitizer::DataFlowSanitizer(
     const std::vector<std::string> &ABIListFiles) {
-  std::vector<std::string> AllABIListFiles(std::move(ABIListFiles)); 
+  std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
   llvm::append_range(AllABIListFiles, ClABIListFiles);
-  // FIXME: should we propagate vfs::FileSystem to this constructor? 
-  ABIList.set( 
-      SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem())); 
-} 
- 
-FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { 
-  SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end()); 
+  // FIXME: should we propagate vfs::FileSystem to this constructor?
+  ABIList.set(
+      SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
+}
+
+FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
+  SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end());
   ArgTypes.append(T->getNumParams(), PrimitiveShadowTy);
-  if (T->isVarArg()) 
+  if (T->isVarArg())
     ArgTypes.push_back(PrimitiveShadowPtrTy);
-  Type *RetType = T->getReturnType(); 
-  if (!RetType->isVoidTy()) 
+  Type *RetType = T->getReturnType();
+  if (!RetType->isVoidTy())
     RetType = StructType::get(RetType, PrimitiveShadowTy);
-  return FunctionType::get(RetType, ArgTypes, T->isVarArg()); 
-} 
- 
-FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) { 
-  assert(!T->isVarArg()); 
-  SmallVector<Type *, 4> ArgTypes; 
-  ArgTypes.push_back(T->getPointerTo()); 
-  ArgTypes.append(T->param_begin(), T->param_end()); 
+  return FunctionType::get(RetType, ArgTypes, T->isVarArg());
+}
+
+FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
+  assert(!T->isVarArg());
+  SmallVector<Type *, 4> ArgTypes;
+  ArgTypes.push_back(T->getPointerTo());
+  ArgTypes.append(T->param_begin(), T->param_end());
   ArgTypes.append(T->getNumParams(), PrimitiveShadowTy);
-  Type *RetType = T->getReturnType(); 
-  if (!RetType->isVoidTy()) 
+  Type *RetType = T->getReturnType();
+  if (!RetType->isVoidTy())
     ArgTypes.push_back(PrimitiveShadowPtrTy);
-  return FunctionType::get(T->getReturnType(), ArgTypes, false); 
-} 
- 
-TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { 
-  SmallVector<Type *, 4> ArgTypes; 
- 
-  // Some parameters of the custom function being constructed are 
-  // parameters of T.  Record the mapping from parameters of T to 
-  // parameters of the custom function, so that parameter attributes 
-  // at call sites can be updated. 
-  std::vector<unsigned> ArgumentIndexMapping; 
-  for (unsigned i = 0, ie = T->getNumParams(); i != ie; ++i) { 
-    Type* param_type = T->getParamType(i); 
-    FunctionType *FT; 
-    if (isa<PointerType>(param_type) && (FT = dyn_cast<FunctionType>( 
-            cast<PointerType>(param_type)->getElementType()))) { 
-      ArgumentIndexMapping.push_back(ArgTypes.size()); 
-      ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo()); 
-      ArgTypes.push_back(Type::getInt8PtrTy(*Ctx)); 
-    } else { 
-      ArgumentIndexMapping.push_back(ArgTypes.size()); 
-      ArgTypes.push_back(param_type); 
-    } 
-  } 
-  for (unsigned i = 0, e = T->getNumParams(); i != e; ++i) 
+  return FunctionType::get(T->getReturnType(), ArgTypes, false);
+}
+
+TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
+  SmallVector<Type *, 4> ArgTypes;
+
+  // Some parameters of the custom function being constructed are
+  // parameters of T.  Record the mapping from parameters of T to
+  // parameters of the custom function, so that parameter attributes
+  // at call sites can be updated.
+  std::vector<unsigned> ArgumentIndexMapping;
+  for (unsigned i = 0, ie = T->getNumParams(); i != ie; ++i) {
+    Type* param_type = T->getParamType(i);
+    FunctionType *FT;
+    if (isa<PointerType>(param_type) && (FT = dyn_cast<FunctionType>(
+            cast<PointerType>(param_type)->getElementType()))) {
+      ArgumentIndexMapping.push_back(ArgTypes.size());
+      ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
+      ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
+    } else {
+      ArgumentIndexMapping.push_back(ArgTypes.size());
+      ArgTypes.push_back(param_type);
+    }
+  }
+  for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
     ArgTypes.push_back(PrimitiveShadowTy);
-  if (T->isVarArg()) 
+  if (T->isVarArg())
     ArgTypes.push_back(PrimitiveShadowPtrTy);
-  Type *RetType = T->getReturnType(); 
-  if (!RetType->isVoidTy()) 
+  Type *RetType = T->getReturnType();
+  if (!RetType->isVoidTy())
     ArgTypes.push_back(PrimitiveShadowPtrTy);
-  return TransformedFunction( 
-      T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()), 
-      ArgumentIndexMapping); 
-} 
- 
+  return TransformedFunction(
+      T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
+      ArgumentIndexMapping);
+}
+
 bool DataFlowSanitizer::isZeroShadow(Value *V) {
   if (!shouldTrackFieldsAndIndices())
     return ZeroPrimitiveShadow == V;
@@ -800,48 +800,48 @@ Type *DataFlowSanitizer::getShadowTy(Value *V) {
 }
 
 bool DataFlowSanitizer::init(Module &M) {
-  Triple TargetTriple(M.getTargetTriple()); 
-  bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64; 
-  bool IsMIPS64 = TargetTriple.isMIPS64(); 
-  bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64 || 
-                   TargetTriple.getArch() == Triple::aarch64_be; 
- 
-  const DataLayout &DL = M.getDataLayout(); 
- 
-  Mod = &M; 
-  Ctx = &M.getContext(); 
+  Triple TargetTriple(M.getTargetTriple());
+  bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
+  bool IsMIPS64 = TargetTriple.isMIPS64();
+  bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64 ||
+                   TargetTriple.getArch() == Triple::aarch64_be;
+
+  const DataLayout &DL = M.getDataLayout();
+
+  Mod = &M;
+  Ctx = &M.getContext();
   Int8Ptr = Type::getInt8PtrTy(*Ctx);
   PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
   PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy);
-  IntptrTy = DL.getIntPtrType(*Ctx); 
+  IntptrTy = DL.getIntPtrType(*Ctx);
   ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);
-  ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidthBytes); 
-  if (IsX86_64) 
-    ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL); 
-  else if (IsMIPS64) 
-    ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL); 
-  // AArch64 supports multiple VMAs and the shadow mask is set at runtime. 
-  else if (IsAArch64) 
-    DFSanRuntimeShadowMask = true; 
-  else 
-    report_fatal_error("unsupported triple"); 
- 
+  ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidthBytes);
+  if (IsX86_64)
+    ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
+  else if (IsMIPS64)
+    ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL);
+  // AArch64 supports multiple VMAs and the shadow mask is set at runtime.
+  else if (IsAArch64)
+    DFSanRuntimeShadowMask = true;
+  else
+    report_fatal_error("unsupported triple");
+
   Type *DFSanUnionArgs[2] = {PrimitiveShadowTy, PrimitiveShadowTy};
-  DFSanUnionFnTy = 
+  DFSanUnionFnTy =
       FunctionType::get(PrimitiveShadowTy, DFSanUnionArgs, /*isVarArg=*/false);
   Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
   DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,
                                          /*isVarArg=*/false);
-  DFSanUnimplementedFnTy = FunctionType::get( 
-      Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); 
+  DFSanUnimplementedFnTy = FunctionType::get(
+      Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
   Type *DFSanSetLabelArgs[3] = {PrimitiveShadowTy, Type::getInt8PtrTy(*Ctx),
                                 IntptrTy};
-  DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), 
-                                        DFSanSetLabelArgs, /*isVarArg=*/false); 
+  DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
+                                        DFSanSetLabelArgs, /*isVarArg=*/false);
   DFSanNonzeroLabelFnTy =
       FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
-  DFSanVarargWrapperFnTy = FunctionType::get( 
-      Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); 
+  DFSanVarargWrapperFnTy = FunctionType::get(
+      Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
   DFSanCmpCallbackFnTy =
       FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
                         /*isVarArg=*/false);
@@ -850,169 +850,169 @@ bool DataFlowSanitizer::init(Module &M) {
       FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,
                         /*isVarArg=*/false);
   Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
-  DFSanMemTransferCallbackFnTy = 
-      FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs, 
-                        /*isVarArg=*/false); 
- 
-  ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000); 
-  return true; 
-} 
- 
-bool DataFlowSanitizer::isInstrumented(const Function *F) { 
-  return !ABIList.isIn(*F, "uninstrumented"); 
-} 
- 
-bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) { 
-  return !ABIList.isIn(*GA, "uninstrumented"); 
-} 
- 
-DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() { 
-  return ClArgsABI ? IA_Args : IA_TLS; 
-} 
- 
-DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) { 
-  if (ABIList.isIn(*F, "functional")) 
-    return WK_Functional; 
-  if (ABIList.isIn(*F, "discard")) 
-    return WK_Discard; 
-  if (ABIList.isIn(*F, "custom")) 
-    return WK_Custom; 
- 
-  return WK_Warning; 
-} 
- 
-void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) { 
-  std::string GVName = std::string(GV->getName()), Prefix = "dfs$"; 
-  GV->setName(Prefix + GVName); 
- 
-  // Try to change the name of the function in module inline asm.  We only do 
-  // this for specific asm directives, currently only ".symver", to try to avoid 
-  // corrupting asm which happens to contain the symbol name as a substring. 
-  // Note that the substitution for .symver assumes that the versioned symbol 
-  // also has an instrumented name. 
-  std::string Asm = GV->getParent()->getModuleInlineAsm(); 
-  std::string SearchStr = ".symver " + GVName + ","; 
-  size_t Pos = Asm.find(SearchStr); 
-  if (Pos != std::string::npos) { 
-    Asm.replace(Pos, SearchStr.size(), 
-                ".symver " + Prefix + GVName + "," + Prefix); 
-    GV->getParent()->setModuleInlineAsm(Asm); 
-  } 
-} 
- 
-Function * 
-DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, 
-                                        GlobalValue::LinkageTypes NewFLink, 
-                                        FunctionType *NewFT) { 
-  FunctionType *FT = F->getFunctionType(); 
-  Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(), 
-                                    NewFName, F->getParent()); 
-  NewF->copyAttributesFrom(F); 
-  NewF->removeAttributes( 
-      AttributeList::ReturnIndex, 
-      AttributeFuncs::typeIncompatible(NewFT->getReturnType())); 
- 
-  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); 
-  if (F->isVarArg()) { 
-    NewF->removeAttributes(AttributeList::FunctionIndex, 
-                           AttrBuilder().addAttribute("split-stack")); 
-    CallInst::Create(DFSanVarargWrapperFn, 
-                     IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "", 
-                     BB); 
-    new UnreachableInst(*Ctx, BB); 
-  } else { 
-    std::vector<Value *> Args; 
-    unsigned n = FT->getNumParams(); 
-    for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n) 
-      Args.push_back(&*ai); 
-    CallInst *CI = CallInst::Create(F, Args, "", BB); 
-    if (FT->getReturnType()->isVoidTy()) 
-      ReturnInst::Create(*Ctx, BB); 
-    else 
-      ReturnInst::Create(*Ctx, CI, BB); 
-  } 
- 
-  return NewF; 
-} 
- 
-Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, 
-                                                          StringRef FName) { 
-  FunctionType *FTT = getTrampolineFunctionType(FT); 
-  FunctionCallee C = Mod->getOrInsertFunction(FName, FTT); 
-  Function *F = dyn_cast<Function>(C.getCallee()); 
-  if (F && F->isDeclaration()) { 
-    F->setLinkage(GlobalValue::LinkOnceODRLinkage); 
-    BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); 
-    std::vector<Value *> Args; 
-    Function::arg_iterator AI = F->arg_begin(); ++AI; 
-    for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N) 
-      Args.push_back(&*AI); 
-    CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB); 
-    ReturnInst *RI; 
-    if (FT->getReturnType()->isVoidTy()) 
-      RI = ReturnInst::Create(*Ctx, BB); 
-    else 
-      RI = ReturnInst::Create(*Ctx, CI, BB); 
- 
+  DFSanMemTransferCallbackFnTy =
+      FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
+                        /*isVarArg=*/false);
+
+  ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
+  return true;
+}
+
+bool DataFlowSanitizer::isInstrumented(const Function *F) {
+  return !ABIList.isIn(*F, "uninstrumented");
+}
+
+bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
+  return !ABIList.isIn(*GA, "uninstrumented");
+}
+
+DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
+  return ClArgsABI ? IA_Args : IA_TLS;
+}
+
+DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
+  if (ABIList.isIn(*F, "functional"))
+    return WK_Functional;
+  if (ABIList.isIn(*F, "discard"))
+    return WK_Discard;
+  if (ABIList.isIn(*F, "custom"))
+    return WK_Custom;
+
+  return WK_Warning;
+}
+
+void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) {
+  std::string GVName = std::string(GV->getName()), Prefix = "dfs$";
+  GV->setName(Prefix + GVName);
+
+  // Try to change the name of the function in module inline asm.  We only do
+  // this for specific asm directives, currently only ".symver", to try to avoid
+  // corrupting asm which happens to contain the symbol name as a substring.
+  // Note that the substitution for .symver assumes that the versioned symbol
+  // also has an instrumented name.
+  std::string Asm = GV->getParent()->getModuleInlineAsm();
+  std::string SearchStr = ".symver " + GVName + ",";
+  size_t Pos = Asm.find(SearchStr);
+  if (Pos != std::string::npos) {
+    Asm.replace(Pos, SearchStr.size(),
+                ".symver " + Prefix + GVName + "," + Prefix);
+    GV->getParent()->setModuleInlineAsm(Asm);
+  }
+}
+
+Function *
+DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
+                                        GlobalValue::LinkageTypes NewFLink,
+                                        FunctionType *NewFT) {
+  FunctionType *FT = F->getFunctionType();
+  Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
+                                    NewFName, F->getParent());
+  NewF->copyAttributesFrom(F);
+  NewF->removeAttributes(
+      AttributeList::ReturnIndex,
+      AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
+
+  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
+  if (F->isVarArg()) {
+    NewF->removeAttributes(AttributeList::FunctionIndex,
+                           AttrBuilder().addAttribute("split-stack"));
+    CallInst::Create(DFSanVarargWrapperFn,
+                     IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
+                     BB);
+    new UnreachableInst(*Ctx, BB);
+  } else {
+    std::vector<Value *> Args;
+    unsigned n = FT->getNumParams();
+    for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
+      Args.push_back(&*ai);
+    CallInst *CI = CallInst::Create(F, Args, "", BB);
+    if (FT->getReturnType()->isVoidTy())
+      ReturnInst::Create(*Ctx, BB);
+    else
+      ReturnInst::Create(*Ctx, CI, BB);
+  }
+
+  return NewF;
+}
+
+Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
+                                                          StringRef FName) {
+  FunctionType *FTT = getTrampolineFunctionType(FT);
+  FunctionCallee C = Mod->getOrInsertFunction(FName, FTT);
+  Function *F = dyn_cast<Function>(C.getCallee());
+  if (F && F->isDeclaration()) {
+    F->setLinkage(GlobalValue::LinkOnceODRLinkage);
+    BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+    std::vector<Value *> Args;
+    Function::arg_iterator AI = F->arg_begin(); ++AI;
+    for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
+      Args.push_back(&*AI);
+    CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB);
+    ReturnInst *RI;
+    if (FT->getReturnType()->isVoidTy())
+      RI = ReturnInst::Create(*Ctx, BB);
+    else
+      RI = ReturnInst::Create(*Ctx, CI, BB);
+
     // F is called by a wrapped custom function with primitive shadows. So
     // its arguments and return value need conversion.
-    DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true); 
-    Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI; 
+    DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
+    Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
     for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) {
       Value *Shadow =
           DFSF.expandFromPrimitiveShadow(ValAI->getType(), &*ShadowAI, CI);
       DFSF.ValShadowMap[&*ValAI] = Shadow;
     }
-    DFSanVisitor(DFSF).visitCallInst(*CI); 
+    DFSanVisitor(DFSF).visitCallInst(*CI);
     if (!FT->getReturnType()->isVoidTy()) {
       Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(
           DFSF.getShadow(RI->getReturnValue()), RI);
       new StoreInst(PrimitiveShadow, &*std::prev(F->arg_end()), RI);
     }
-  } 
- 
-  return cast<Constant>(C.getCallee()); 
-} 
- 
-// Initialize DataFlowSanitizer runtime functions and declare them in the module 
-void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) { 
-  { 
-    AttributeList AL; 
-    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 
-                         Attribute::NoUnwind); 
-    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 
-                         Attribute::ReadNone); 
-    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, 
-                         Attribute::ZExt); 
-    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); 
-    AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt); 
-    DFSanUnionFn = 
-        Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL); 
-  } 
-  { 
-    AttributeList AL; 
-    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 
-                         Attribute::NoUnwind); 
-    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 
-                         Attribute::ReadNone); 
-    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, 
-                         Attribute::ZExt); 
-    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); 
-    AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt); 
-    DFSanCheckedUnionFn = 
-        Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy, AL); 
-  } 
-  { 
-    AttributeList AL; 
-    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 
-                         Attribute::NoUnwind); 
-    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 
-                         Attribute::ReadOnly); 
-    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, 
-                         Attribute::ZExt); 
-    DFSanUnionLoadFn = 
-        Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL); 
-  } 
+  }
+
+  return cast<Constant>(C.getCallee());
+}
+
+// Initialize DataFlowSanitizer runtime functions and declare them in the module
+void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
+  {
+    AttributeList AL;
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::NoUnwind);
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::ReadNone);
+    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+                         Attribute::ZExt);
+    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+    AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
+    DFSanUnionFn =
+        Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL);
+  }
+  {
+    AttributeList AL;
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::NoUnwind);
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::ReadNone);
+    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+                         Attribute::ZExt);
+    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+    AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
+    DFSanCheckedUnionFn =
+        Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy, AL);
+  }
+  {
+    AttributeList AL;
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::NoUnwind);
+    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                         Attribute::ReadOnly);
+    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+                         Attribute::ZExt);
+    DFSanUnionLoadFn =
+        Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
+  }
   {
     AttributeList AL;
     AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
@@ -1024,285 +1024,285 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
     DFSanUnionLoadFast16LabelsFn = Mod->getOrInsertFunction(
         "__dfsan_union_load_fast16labels", DFSanUnionLoadFnTy, AL);
   }
-  DFSanUnimplementedFn = 
-      Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy); 
-  { 
-    AttributeList AL; 
-    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); 
-    DFSanSetLabelFn = 
-        Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL); 
-  } 
-  DFSanNonzeroLabelFn = 
-      Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); 
-  DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper", 
-                                                  DFSanVarargWrapperFnTy); 
-} 
- 
-// Initializes event callback functions and declare them in the module 
-void DataFlowSanitizer::initializeCallbackFunctions(Module &M) { 
-  DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback", 
+  DFSanUnimplementedFn =
+      Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
+  {
+    AttributeList AL;
+    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+    DFSanSetLabelFn =
+        Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
+  }
+  DFSanNonzeroLabelFn =
+      Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
+  DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
+                                                  DFSanVarargWrapperFnTy);
+}
+
+// Initializes event callback functions and declare them in the module
+void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
+  DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback",
                                                  DFSanLoadStoreCallbackFnTy);
   DFSanStoreCallbackFn = Mod->getOrInsertFunction("__dfsan_store_callback",
                                                   DFSanLoadStoreCallbackFnTy);
-  DFSanMemTransferCallbackFn = Mod->getOrInsertFunction( 
-      "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy); 
+  DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
+      "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
   DFSanCmpCallbackFn =
       Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy);
-} 
- 
+}
+
 bool DataFlowSanitizer::runImpl(Module &M) {
   init(M);
 
-  if (ABIList.isIn(M, "skip")) 
-    return false; 
- 
-  const unsigned InitialGlobalSize = M.global_size(); 
-  const unsigned InitialModuleSize = M.size(); 
- 
-  bool Changed = false; 
- 
+  if (ABIList.isIn(M, "skip"))
+    return false;
+
+  const unsigned InitialGlobalSize = M.global_size();
+  const unsigned InitialModuleSize = M.size();
+
+  bool Changed = false;
+
   Type *ArgTLSTy = ArrayType::get(Type::getInt64Ty(*Ctx), kArgTLSSize / 8);
   ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
   if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS)) {
     Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
     G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
-  } 
+  }
   Type *RetvalTLSTy =
       ArrayType::get(Type::getInt64Ty(*Ctx), kRetvalTLSSize / 8);
   RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", RetvalTLSTy);
   if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS)) {
     Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
     G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
-  } 
- 
-  ExternalShadowMask = 
-      Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy); 
- 
-  initializeCallbackFunctions(M); 
-  initializeRuntimeFunctions(M); 
- 
-  std::vector<Function *> FnsToInstrument; 
-  SmallPtrSet<Function *, 2> FnsWithNativeABI; 
-  for (Function &i : M) { 
-    if (!i.isIntrinsic() && 
-        &i != DFSanUnionFn.getCallee()->stripPointerCasts() && 
-        &i != DFSanCheckedUnionFn.getCallee()->stripPointerCasts() && 
-        &i != DFSanUnionLoadFn.getCallee()->stripPointerCasts() && 
+  }
+
+  ExternalShadowMask =
+      Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy);
+
+  initializeCallbackFunctions(M);
+  initializeRuntimeFunctions(M);
+
+  std::vector<Function *> FnsToInstrument;
+  SmallPtrSet<Function *, 2> FnsWithNativeABI;
+  for (Function &i : M) {
+    if (!i.isIntrinsic() &&
+        &i != DFSanUnionFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanCheckedUnionFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanUnionLoadFn.getCallee()->stripPointerCasts() &&
         &i != DFSanUnionLoadFast16LabelsFn.getCallee()->stripPointerCasts() &&
-        &i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() && 
-        &i != DFSanSetLabelFn.getCallee()->stripPointerCasts() && 
-        &i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() && 
-        &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts() && 
-        &i != DFSanLoadCallbackFn.getCallee()->stripPointerCasts() && 
-        &i != DFSanStoreCallbackFn.getCallee()->stripPointerCasts() && 
-        &i != DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts() && 
-        &i != DFSanCmpCallbackFn.getCallee()->stripPointerCasts()) 
-      FnsToInstrument.push_back(&i); 
-  } 
- 
-  // Give function aliases prefixes when necessary, and build wrappers where the 
-  // instrumentedness is inconsistent. 
-  for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) { 
-    GlobalAlias *GA = &*i; 
-    ++i; 
-    // Don't stop on weak.  We assume people aren't playing games with the 
-    // instrumentedness of overridden weak aliases. 
-    if (auto F = dyn_cast<Function>(GA->getBaseObject())) { 
-      bool GAInst = isInstrumented(GA), FInst = isInstrumented(F); 
-      if (GAInst && FInst) { 
-        addGlobalNamePrefix(GA); 
-      } else if (GAInst != FInst) { 
-        // Non-instrumented alias of an instrumented function, or vice versa. 
-        // Replace the alias with a native-ABI wrapper of the aliasee.  The pass 
-        // below will take care of instrumenting it. 
-        Function *NewF = 
-            buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType()); 
-        GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType())); 
-        NewF->takeName(GA); 
-        GA->eraseFromParent(); 
-        FnsToInstrument.push_back(NewF); 
-      } 
-    } 
-  } 
- 
-  ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly) 
-      .addAttribute(Attribute::ReadNone); 
- 
-  // First, change the ABI of every function in the module.  ABI-listed 
-  // functions keep their original ABI and get a wrapper function. 
-  for (std::vector<Function *>::iterator i = FnsToInstrument.begin(), 
-                                         e = FnsToInstrument.end(); 
-       i != e; ++i) { 
-    Function &F = **i; 
-    FunctionType *FT = F.getFunctionType(); 
- 
-    bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() && 
-                              FT->getReturnType()->isVoidTy()); 
- 
-    if (isInstrumented(&F)) { 
-      // Instrumented functions get a 'dfs$' prefix.  This allows us to more 
-      // easily identify cases of mismatching ABIs. 
-      if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) { 
-        FunctionType *NewFT = getArgsFunctionType(FT); 
-        Function *NewF = Function::Create(NewFT, F.getLinkage(), 
-                                          F.getAddressSpace(), "", &M); 
-        NewF->copyAttributesFrom(&F); 
-        NewF->removeAttributes( 
-            AttributeList::ReturnIndex, 
-            AttributeFuncs::typeIncompatible(NewFT->getReturnType())); 
-        for (Function::arg_iterator FArg = F.arg_begin(), 
-                                    NewFArg = NewF->arg_begin(), 
-                                    FArgEnd = F.arg_end(); 
-             FArg != FArgEnd; ++FArg, ++NewFArg) { 
-          FArg->replaceAllUsesWith(&*NewFArg); 
-        } 
-        NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList()); 
- 
-        for (Function::user_iterator UI = F.user_begin(), UE = F.user_end(); 
-             UI != UE;) { 
-          BlockAddress *BA = dyn_cast<BlockAddress>(*UI); 
-          ++UI; 
-          if (BA) { 
-            BA->replaceAllUsesWith( 
-                BlockAddress::get(NewF, BA->getBasicBlock())); 
-            delete BA; 
-          } 
-        } 
-        F.replaceAllUsesWith( 
-            ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT))); 
-        NewF->takeName(&F); 
-        F.eraseFromParent(); 
-        *i = NewF; 
-        addGlobalNamePrefix(NewF); 
-      } else { 
-        addGlobalNamePrefix(&F); 
-      } 
-    } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) { 
-      // Build a wrapper function for F.  The wrapper simply calls F, and is 
-      // added to FnsToInstrument so that any instrumentation according to its 
-      // WrapperKind is done in the second pass below. 
-      FunctionType *NewFT = getInstrumentedABI() == IA_Args 
-                                ? getArgsFunctionType(FT) 
-                                : FT; 
- 
-      // If the function being wrapped has local linkage, then preserve the 
-      // function's linkage in the wrapper function. 
-      GlobalValue::LinkageTypes wrapperLinkage = 
-          F.hasLocalLinkage() 
-              ? F.getLinkage() 
-              : GlobalValue::LinkOnceODRLinkage; 
- 
-      Function *NewF = buildWrapperFunction( 
-          &F, std::string("dfsw$") + std::string(F.getName()), 
-          wrapperLinkage, NewFT); 
-      if (getInstrumentedABI() == IA_TLS) 
-        NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs); 
- 
-      Value *WrappedFnCst = 
-          ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)); 
-      F.replaceAllUsesWith(WrappedFnCst); 
- 
-      UnwrappedFnMap[WrappedFnCst] = &F; 
-      *i = NewF; 
- 
-      if (!F.isDeclaration()) { 
-        // This function is probably defining an interposition of an 
-        // uninstrumented function and hence needs to keep the original ABI. 
-        // But any functions it may call need to use the instrumented ABI, so 
-        // we instrument it in a mode which preserves the original ABI. 
-        FnsWithNativeABI.insert(&F); 
- 
-        // This code needs to rebuild the iterators, as they may be invalidated 
-        // by the push_back, taking care that the new range does not include 
-        // any functions added by this code. 
-        size_t N = i - FnsToInstrument.begin(), 
-               Count = e - FnsToInstrument.begin(); 
-        FnsToInstrument.push_back(&F); 
-        i = FnsToInstrument.begin() + N; 
-        e = FnsToInstrument.begin() + Count; 
-      } 
-               // Hopefully, nobody will try to indirectly call a vararg 
-               // function... yet. 
-    } else if (FT->isVarArg()) { 
-      UnwrappedFnMap[&F] = &F; 
-      *i = nullptr; 
-    } 
-  } 
- 
-  for (Function *i : FnsToInstrument) { 
-    if (!i || i->isDeclaration()) 
-      continue; 
- 
-    removeUnreachableBlocks(*i); 
- 
-    DFSanFunction DFSF(*this, i, FnsWithNativeABI.count(i)); 
- 
-    // DFSanVisitor may create new basic blocks, which confuses df_iterator. 
-    // Build a copy of the list before iterating over it. 
-    SmallVector<BasicBlock *, 4> BBList(depth_first(&i->getEntryBlock())); 
- 
-    for (BasicBlock *i : BBList) { 
-      Instruction *Inst = &i->front(); 
-      while (true) { 
-        // DFSanVisitor may split the current basic block, changing the current 
-        // instruction's next pointer and moving the next instruction to the 
-        // tail block from which we should continue. 
-        Instruction *Next = Inst->getNextNode(); 
-        // DFSanVisitor may delete Inst, so keep track of whether it was a 
-        // terminator. 
-        bool IsTerminator = Inst->isTerminator(); 
-        if (!DFSF.SkipInsts.count(Inst)) 
-          DFSanVisitor(DFSF).visit(Inst); 
-        if (IsTerminator) 
-          break; 
-        Inst = Next; 
-      } 
-    } 
- 
-    // We will not necessarily be able to compute the shadow for every phi node 
-    // until we have visited every block.  Therefore, the code that handles phi 
-    // nodes adds them to the PHIFixups list so that they can be properly 
-    // handled here. 
-    for (std::vector<std::pair<PHINode *, PHINode *>>::iterator 
-             i = DFSF.PHIFixups.begin(), 
-             e = DFSF.PHIFixups.end(); 
-         i != e; ++i) { 
-      for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n; 
-           ++val) { 
-        i->second->setIncomingValue( 
-            val, DFSF.getShadow(i->first->getIncomingValue(val))); 
-      } 
-    } 
- 
-    // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy 
-    // places (i.e. instructions in basic blocks we haven't even begun visiting 
-    // yet).  To make our life easier, do this work in a pass after the main 
-    // instrumentation. 
-    if (ClDebugNonzeroLabels) { 
-      for (Value *V : DFSF.NonZeroChecks) { 
-        Instruction *Pos; 
-        if (Instruction *I = dyn_cast<Instruction>(V)) 
-          Pos = I->getNextNode(); 
-        else 
-          Pos = &DFSF.F->getEntryBlock().front(); 
-        while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos)) 
-          Pos = Pos->getNextNode(); 
-        IRBuilder<> IRB(Pos); 
+        &i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanSetLabelFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanLoadCallbackFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanStoreCallbackFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts() &&
+        &i != DFSanCmpCallbackFn.getCallee()->stripPointerCasts())
+      FnsToInstrument.push_back(&i);
+  }
+
+  // Give function aliases prefixes when necessary, and build wrappers where the
+  // instrumentedness is inconsistent.
+  for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) {
+    GlobalAlias *GA = &*i;
+    ++i;
+    // Don't stop on weak.  We assume people aren't playing games with the
+    // instrumentedness of overridden weak aliases.
+    if (auto F = dyn_cast<Function>(GA->getBaseObject())) {
+      bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
+      if (GAInst && FInst) {
+        addGlobalNamePrefix(GA);
+      } else if (GAInst != FInst) {
+        // Non-instrumented alias of an instrumented function, or vice versa.
+        // Replace the alias with a native-ABI wrapper of the aliasee.  The pass
+        // below will take care of instrumenting it.
+        Function *NewF =
+            buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
+        GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType()));
+        NewF->takeName(GA);
+        GA->eraseFromParent();
+        FnsToInstrument.push_back(NewF);
+      }
+    }
+  }
+
+  ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly)
+      .addAttribute(Attribute::ReadNone);
+
+  // First, change the ABI of every function in the module.  ABI-listed
+  // functions keep their original ABI and get a wrapper function.
+  for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
+                                         e = FnsToInstrument.end();
+       i != e; ++i) {
+    Function &F = **i;
+    FunctionType *FT = F.getFunctionType();
+
+    bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
+                              FT->getReturnType()->isVoidTy());
+
+    if (isInstrumented(&F)) {
+      // Instrumented functions get a 'dfs$' prefix.  This allows us to more
+      // easily identify cases of mismatching ABIs.
+      if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
+        FunctionType *NewFT = getArgsFunctionType(FT);
+        Function *NewF = Function::Create(NewFT, F.getLinkage(),
+                                          F.getAddressSpace(), "", &M);
+        NewF->copyAttributesFrom(&F);
+        NewF->removeAttributes(
+            AttributeList::ReturnIndex,
+            AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
+        for (Function::arg_iterator FArg = F.arg_begin(),
+                                    NewFArg = NewF->arg_begin(),
+                                    FArgEnd = F.arg_end();
+             FArg != FArgEnd; ++FArg, ++NewFArg) {
+          FArg->replaceAllUsesWith(&*NewFArg);
+        }
+        NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
+
+        for (Function::user_iterator UI = F.user_begin(), UE = F.user_end();
+             UI != UE;) {
+          BlockAddress *BA = dyn_cast<BlockAddress>(*UI);
+          ++UI;
+          if (BA) {
+            BA->replaceAllUsesWith(
+                BlockAddress::get(NewF, BA->getBasicBlock()));
+            delete BA;
+          }
+        }
+        F.replaceAllUsesWith(
+            ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)));
+        NewF->takeName(&F);
+        F.eraseFromParent();
+        *i = NewF;
+        addGlobalNamePrefix(NewF);
+      } else {
+        addGlobalNamePrefix(&F);
+      }
+    } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
+      // Build a wrapper function for F.  The wrapper simply calls F, and is
+      // added to FnsToInstrument so that any instrumentation according to its
+      // WrapperKind is done in the second pass below.
+      FunctionType *NewFT = getInstrumentedABI() == IA_Args
+                                ? getArgsFunctionType(FT)
+                                : FT;
+
+      // If the function being wrapped has local linkage, then preserve the
+      // function's linkage in the wrapper function.
+      GlobalValue::LinkageTypes wrapperLinkage =
+          F.hasLocalLinkage()
+              ? F.getLinkage()
+              : GlobalValue::LinkOnceODRLinkage;
+
+      Function *NewF = buildWrapperFunction(
+          &F, std::string("dfsw$") + std::string(F.getName()),
+          wrapperLinkage, NewFT);
+      if (getInstrumentedABI() == IA_TLS)
+        NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs);
+
+      Value *WrappedFnCst =
+          ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
+      F.replaceAllUsesWith(WrappedFnCst);
+
+      UnwrappedFnMap[WrappedFnCst] = &F;
+      *i = NewF;
+
+      if (!F.isDeclaration()) {
+        // This function is probably defining an interposition of an
+        // uninstrumented function and hence needs to keep the original ABI.
+        // But any functions it may call need to use the instrumented ABI, so
+        // we instrument it in a mode which preserves the original ABI.
+        FnsWithNativeABI.insert(&F);
+
+        // This code needs to rebuild the iterators, as they may be invalidated
+        // by the push_back, taking care that the new range does not include
+        // any functions added by this code.
+        size_t N = i - FnsToInstrument.begin(),
+               Count = e - FnsToInstrument.begin();
+        FnsToInstrument.push_back(&F);
+        i = FnsToInstrument.begin() + N;
+        e = FnsToInstrument.begin() + Count;
+      }
+               // Hopefully, nobody will try to indirectly call a vararg
+               // function... yet.
+    } else if (FT->isVarArg()) {
+      UnwrappedFnMap[&F] = &F;
+      *i = nullptr;
+    }
+  }
+
+  for (Function *i : FnsToInstrument) {
+    if (!i || i->isDeclaration())
+      continue;
+
+    removeUnreachableBlocks(*i);
+
+    DFSanFunction DFSF(*this, i, FnsWithNativeABI.count(i));
+
+    // DFSanVisitor may create new basic blocks, which confuses df_iterator.
+    // Build a copy of the list before iterating over it.
+    SmallVector<BasicBlock *, 4> BBList(depth_first(&i->getEntryBlock()));
+
+    for (BasicBlock *i : BBList) {
+      Instruction *Inst = &i->front();
+      while (true) {
+        // DFSanVisitor may split the current basic block, changing the current
+        // instruction's next pointer and moving the next instruction to the
+        // tail block from which we should continue.
+        Instruction *Next = Inst->getNextNode();
+        // DFSanVisitor may delete Inst, so keep track of whether it was a
+        // terminator.
+        bool IsTerminator = Inst->isTerminator();
+        if (!DFSF.SkipInsts.count(Inst))
+          DFSanVisitor(DFSF).visit(Inst);
+        if (IsTerminator)
+          break;
+        Inst = Next;
+      }
+    }
+
+    // We will not necessarily be able to compute the shadow for every phi node
+    // until we have visited every block.  Therefore, the code that handles phi
+    // nodes adds them to the PHIFixups list so that they can be properly
+    // handled here.
+    for (std::vector<std::pair<PHINode *, PHINode *>>::iterator
+             i = DFSF.PHIFixups.begin(),
+             e = DFSF.PHIFixups.end();
+         i != e; ++i) {
+      for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n;
+           ++val) {
+        i->second->setIncomingValue(
+            val, DFSF.getShadow(i->first->getIncomingValue(val)));
+      }
+    }
+
+    // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
+    // places (i.e. instructions in basic blocks we haven't even begun visiting
+    // yet).  To make our life easier, do this work in a pass after the main
+    // instrumentation.
+    if (ClDebugNonzeroLabels) {
+      for (Value *V : DFSF.NonZeroChecks) {
+        Instruction *Pos;
+        if (Instruction *I = dyn_cast<Instruction>(V))
+          Pos = I->getNextNode();
+        else
+          Pos = &DFSF.F->getEntryBlock().front();
+        while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
+          Pos = Pos->getNextNode();
+        IRBuilder<> IRB(Pos);
         Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);
         Value *Ne =
             IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);
-        BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen( 
-            Ne, Pos, /*Unreachable=*/false, ColdCallWeights)); 
-        IRBuilder<> ThenIRB(BI); 
-        ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {}); 
-      } 
-    } 
-  } 
- 
-  return Changed || !FnsToInstrument.empty() || 
-         M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize; 
-} 
- 
+        BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+            Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
+        IRBuilder<> ThenIRB(BI);
+        ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
+      }
+    }
+  }
+
+  return Changed || !FnsToInstrument.empty() ||
+         M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
+}
+
 Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
   Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);
   if (ArgOffset)
@@ -1310,12 +1310,12 @@ Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
   return IRB.CreateIntToPtr(Base, PointerType::get(DFS.getShadowTy(T), 0),
                             "_dfsarg");
 }
- 
+
 Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
   return IRB.CreatePointerCast(
       DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret");
-} 
- 
+}
+
 Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
   unsigned ArgOffset = 0;
   const DataLayout &DL = F->getParent()->getDataLayout();
@@ -1325,7 +1325,7 @@ Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
         break;
       continue;
     }
- 
+
     unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));
     if (A != &FArg) {
       ArgOffset += alignTo(Size, kShadowTLSAlignment);
@@ -1333,7 +1333,7 @@ Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
         break; // ArgTLS overflows, uses a zero shadow.
       continue;
     }
- 
+
     if (ArgOffset + Size > kArgTLSSize)
       break; // ArgTLS overflows, uses a zero shadow.
 
@@ -1345,224 +1345,224 @@ Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
   }
 
   return DFS.getZeroShadow(A);
-} 
- 
-Value *DFSanFunction::getShadow(Value *V) { 
-  if (!isa<Argument>(V) && !isa<Instruction>(V)) 
+}
+
+Value *DFSanFunction::getShadow(Value *V) {
+  if (!isa<Argument>(V) && !isa<Instruction>(V))
     return DFS.getZeroShadow(V);
-  Value *&Shadow = ValShadowMap[V]; 
-  if (!Shadow) { 
-    if (Argument *A = dyn_cast<Argument>(V)) { 
-      if (IsNativeABI) 
+  Value *&Shadow = ValShadowMap[V];
+  if (!Shadow) {
+    if (Argument *A = dyn_cast<Argument>(V)) {
+      if (IsNativeABI)
         return DFS.getZeroShadow(V);
-      switch (IA) { 
-      case DataFlowSanitizer::IA_TLS: { 
+      switch (IA) {
+      case DataFlowSanitizer::IA_TLS: {
         Shadow = getShadowForTLSArgument(A);
-        break; 
-      } 
-      case DataFlowSanitizer::IA_Args: { 
-        unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2; 
-        Function::arg_iterator i = F->arg_begin(); 
-        while (ArgIdx--) 
-          ++i; 
-        Shadow = &*i; 
+        break;
+      }
+      case DataFlowSanitizer::IA_Args: {
+        unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2;
+        Function::arg_iterator i = F->arg_begin();
+        while (ArgIdx--)
+          ++i;
+        Shadow = &*i;
         assert(Shadow->getType() == DFS.PrimitiveShadowTy);
-        break; 
-      } 
-      } 
-      NonZeroChecks.push_back(Shadow); 
-    } else { 
+        break;
+      }
+      }
+      NonZeroChecks.push_back(Shadow);
+    } else {
       Shadow = DFS.getZeroShadow(V);
-    } 
-  } 
-  return Shadow; 
-} 
- 
-void DFSanFunction::setShadow(Instruction *I, Value *Shadow) { 
-  assert(!ValShadowMap.count(I)); 
+    }
+  }
+  return Shadow;
+}
+
+void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
+  assert(!ValShadowMap.count(I));
   assert(DFS.shouldTrackFieldsAndIndices() ||
          Shadow->getType() == DFS.PrimitiveShadowTy);
-  ValShadowMap[I] = Shadow; 
-} 
- 
-Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) { 
-  assert(Addr != RetvalTLS && "Reinstrumenting?"); 
-  IRBuilder<> IRB(Pos); 
-  Value *ShadowPtrMaskValue; 
-  if (DFSanRuntimeShadowMask) 
-    ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask); 
-  else 
-    ShadowPtrMaskValue = ShadowPtrMask; 
-  return IRB.CreateIntToPtr( 
-      IRB.CreateMul( 
-          IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), 
-                        IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)), 
-          ShadowPtrMul), 
+  ValShadowMap[I] = Shadow;
+}
+
+Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
+  assert(Addr != RetvalTLS && "Reinstrumenting?");
+  IRBuilder<> IRB(Pos);
+  Value *ShadowPtrMaskValue;
+  if (DFSanRuntimeShadowMask)
+    ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask);
+  else
+    ShadowPtrMaskValue = ShadowPtrMask;
+  return IRB.CreateIntToPtr(
+      IRB.CreateMul(
+          IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy),
+                        IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)),
+          ShadowPtrMul),
       PrimitiveShadowPtrTy);
-} 
- 
+}
+
 Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
                                                 Instruction *Pos) {
   Value *PrimitiveValue = combineShadows(V1, V2, Pos);
   return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);
 }
 
-// Generates IR to compute the union of the two given shadows, inserting it 
+// Generates IR to compute the union of the two given shadows, inserting it
 // before Pos. The combined value is with primitive type.
-Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { 
+Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
   if (DFS.isZeroShadow(V1))
     return collapseToPrimitiveShadow(V2, Pos);
   if (DFS.isZeroShadow(V2))
     return collapseToPrimitiveShadow(V1, Pos);
-  if (V1 == V2) 
+  if (V1 == V2)
     return collapseToPrimitiveShadow(V1, Pos);
- 
-  auto V1Elems = ShadowElements.find(V1); 
-  auto V2Elems = ShadowElements.find(V2); 
-  if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) { 
-    if (std::includes(V1Elems->second.begin(), V1Elems->second.end(), 
-                      V2Elems->second.begin(), V2Elems->second.end())) { 
+
+  auto V1Elems = ShadowElements.find(V1);
+  auto V2Elems = ShadowElements.find(V2);
+  if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
+    if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
+                      V2Elems->second.begin(), V2Elems->second.end())) {
       return collapseToPrimitiveShadow(V1, Pos);
-    } else if (std::includes(V2Elems->second.begin(), V2Elems->second.end(), 
-                             V1Elems->second.begin(), V1Elems->second.end())) { 
+    } else if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
+                             V1Elems->second.begin(), V1Elems->second.end())) {
       return collapseToPrimitiveShadow(V2, Pos);
-    } 
-  } else if (V1Elems != ShadowElements.end()) { 
-    if (V1Elems->second.count(V2)) 
+    }
+  } else if (V1Elems != ShadowElements.end()) {
+    if (V1Elems->second.count(V2))
       return collapseToPrimitiveShadow(V1, Pos);
-  } else if (V2Elems != ShadowElements.end()) { 
-    if (V2Elems->second.count(V1)) 
+  } else if (V2Elems != ShadowElements.end()) {
+    if (V2Elems->second.count(V1))
       return collapseToPrimitiveShadow(V2, Pos);
-  } 
- 
-  auto Key = std::make_pair(V1, V2); 
-  if (V1 > V2) 
-    std::swap(Key.first, Key.second); 
+  }
+
+  auto Key = std::make_pair(V1, V2);
+  if (V1 > V2)
+    std::swap(Key.first, Key.second);
   CachedShadow &CCS = CachedShadows[Key];
-  if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent())) 
-    return CCS.Shadow; 
- 
+  if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
+    return CCS.Shadow;
+
   // Converts inputs shadows to shadows with primitive types.
   Value *PV1 = collapseToPrimitiveShadow(V1, Pos);
   Value *PV2 = collapseToPrimitiveShadow(V2, Pos);
 
-  IRBuilder<> IRB(Pos); 
+  IRBuilder<> IRB(Pos);
   if (ClFast16Labels) {
     CCS.Block = Pos->getParent();
     CCS.Shadow = IRB.CreateOr(PV1, PV2);
   } else if (AvoidNewBlocks) {
     CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {PV1, PV2});
-    Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); 
-    Call->addParamAttr(0, Attribute::ZExt); 
-    Call->addParamAttr(1, Attribute::ZExt); 
- 
-    CCS.Block = Pos->getParent(); 
-    CCS.Shadow = Call; 
-  } else { 
-    BasicBlock *Head = Pos->getParent(); 
+    Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+    Call->addParamAttr(0, Attribute::ZExt);
+    Call->addParamAttr(1, Attribute::ZExt);
+
+    CCS.Block = Pos->getParent();
+    CCS.Shadow = Call;
+  } else {
+    BasicBlock *Head = Pos->getParent();
     Value *Ne = IRB.CreateICmpNE(PV1, PV2);
-    BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen( 
-        Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT)); 
-    IRBuilder<> ThenIRB(BI); 
+    BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+        Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT));
+    IRBuilder<> ThenIRB(BI);
     CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {PV1, PV2});
-    Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); 
-    Call->addParamAttr(0, Attribute::ZExt); 
-    Call->addParamAttr(1, Attribute::ZExt); 
- 
-    BasicBlock *Tail = BI->getSuccessor(0); 
+    Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+    Call->addParamAttr(0, Attribute::ZExt);
+    Call->addParamAttr(1, Attribute::ZExt);
+
+    BasicBlock *Tail = BI->getSuccessor(0);
     PHINode *Phi =
         PHINode::Create(DFS.PrimitiveShadowTy, 2, "", &Tail->front());
-    Phi->addIncoming(Call, Call->getParent()); 
+    Phi->addIncoming(Call, Call->getParent());
     Phi->addIncoming(PV1, Head);
- 
-    CCS.Block = Tail; 
-    CCS.Shadow = Phi; 
-  } 
- 
-  std::set<Value *> UnionElems; 
-  if (V1Elems != ShadowElements.end()) { 
-    UnionElems = V1Elems->second; 
-  } else { 
-    UnionElems.insert(V1); 
-  } 
-  if (V2Elems != ShadowElements.end()) { 
-    UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end()); 
-  } else { 
-    UnionElems.insert(V2); 
-  } 
-  ShadowElements[CCS.Shadow] = std::move(UnionElems); 
- 
-  return CCS.Shadow; 
-} 
- 
-// A convenience function which folds the shadows of each of the operands 
-// of the provided instruction Inst, inserting the IR before Inst.  Returns 
-// the computed union Value. 
-Value *DFSanFunction::combineOperandShadows(Instruction *Inst) { 
-  if (Inst->getNumOperands() == 0) 
+
+    CCS.Block = Tail;
+    CCS.Shadow = Phi;
+  }
+
+  std::set<Value *> UnionElems;
+  if (V1Elems != ShadowElements.end()) {
+    UnionElems = V1Elems->second;
+  } else {
+    UnionElems.insert(V1);
+  }
+  if (V2Elems != ShadowElements.end()) {
+    UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
+  } else {
+    UnionElems.insert(V2);
+  }
+  ShadowElements[CCS.Shadow] = std::move(UnionElems);
+
+  return CCS.Shadow;
+}
+
+// A convenience function which folds the shadows of each of the operands
+// of the provided instruction Inst, inserting the IR before Inst.  Returns
+// the computed union Value.
+Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
+  if (Inst->getNumOperands() == 0)
     return DFS.getZeroShadow(Inst);
- 
-  Value *Shadow = getShadow(Inst->getOperand(0)); 
-  for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) { 
-    Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst); 
-  } 
+
+  Value *Shadow = getShadow(Inst->getOperand(0));
+  for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) {
+    Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst);
+  }
   return expandFromPrimitiveShadow(Inst->getType(), Shadow, Inst);
-} 
- 
-Value *DFSanVisitor::visitOperandShadowInst(Instruction &I) { 
-  Value *CombinedShadow = DFSF.combineOperandShadows(&I); 
-  DFSF.setShadow(&I, CombinedShadow); 
-  return CombinedShadow; 
-} 
- 
-// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where 
+}
+
+Value *DFSanVisitor::visitOperandShadowInst(Instruction &I) {
+  Value *CombinedShadow = DFSF.combineOperandShadows(&I);
+  DFSF.setShadow(&I, CombinedShadow);
+  return CombinedShadow;
+}
+
+// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where
 // Addr has alignment Align, and take the union of each of those shadows. The
 // returned shadow always has primitive type.
-Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, 
-                                 Instruction *Pos) { 
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) { 
-    const auto i = AllocaShadowMap.find(AI); 
-    if (i != AllocaShadowMap.end()) { 
-      IRBuilder<> IRB(Pos); 
+Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
+                                 Instruction *Pos) {
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+    const auto i = AllocaShadowMap.find(AI);
+    if (i != AllocaShadowMap.end()) {
+      IRBuilder<> IRB(Pos);
       return IRB.CreateLoad(DFS.PrimitiveShadowTy, i->second);
-    } 
-  } 
- 
-  const llvm::Align ShadowAlign(Align * DFS.ShadowWidthBytes); 
-  SmallVector<const Value *, 2> Objs; 
+    }
+  }
+
+  const llvm::Align ShadowAlign(Align * DFS.ShadowWidthBytes);
+  SmallVector<const Value *, 2> Objs;
   getUnderlyingObjects(Addr, Objs);
-  bool AllConstants = true; 
-  for (const Value *Obj : Objs) { 
-    if (isa<Function>(Obj) || isa<BlockAddress>(Obj)) 
-      continue; 
-    if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant()) 
-      continue; 
- 
-    AllConstants = false; 
-    break; 
-  } 
-  if (AllConstants) 
+  bool AllConstants = true;
+  for (const Value *Obj : Objs) {
+    if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
+      continue;
+    if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
+      continue;
+
+    AllConstants = false;
+    break;
+  }
+  if (AllConstants)
     return DFS.ZeroPrimitiveShadow;
- 
-  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos); 
-  switch (Size) { 
-  case 0: 
+
+  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+  switch (Size) {
+  case 0:
     return DFS.ZeroPrimitiveShadow;
-  case 1: { 
+  case 1: {
     LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);
-    LI->setAlignment(ShadowAlign); 
-    return LI; 
-  } 
-  case 2: { 
-    IRBuilder<> IRB(Pos); 
+    LI->setAlignment(ShadowAlign);
+    return LI;
+  }
+  case 2: {
+    IRBuilder<> IRB(Pos);
     Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,
-                                       ConstantInt::get(DFS.IntptrTy, 1)); 
-    return combineShadows( 
+                                       ConstantInt::get(DFS.IntptrTy, 1));
+    return combineShadows(
         IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign),
         IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign),
         Pos);
-  } 
-  } 
+  }
+  }
 
   if (ClFast16Labels && Size % (64 / DFS.ShadowWidthBits) == 0) {
     // First OR all the WideShadows, then OR individual shadows within the
@@ -1587,226 +1587,226 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
     }
     return IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy);
   }
-  if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidthBits) == 0) { 
-    // Fast path for the common case where each byte has identical shadow: load 
-    // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any 
-    // shadow is non-equal. 
-    BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F); 
-    IRBuilder<> FallbackIRB(FallbackBB); 
-    CallInst *FallbackCall = FallbackIRB.CreateCall( 
-        DFS.DFSanUnionLoadFn, 
-        {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)}); 
-    FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); 
- 
-    // Compare each of the shadows stored in the loaded 64 bits to each other, 
-    // by computing (WideShadow rotl ShadowWidthBits) == WideShadow. 
-    IRBuilder<> IRB(Pos); 
-    Value *WideAddr = 
-        IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx)); 
-    Value *WideShadow = 
-        IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign); 
+  if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidthBits) == 0) {
+    // Fast path for the common case where each byte has identical shadow: load
+    // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any
+    // shadow is non-equal.
+    BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F);
+    IRBuilder<> FallbackIRB(FallbackBB);
+    CallInst *FallbackCall = FallbackIRB.CreateCall(
+        DFS.DFSanUnionLoadFn,
+        {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
+    FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+
+    // Compare each of the shadows stored in the loaded 64 bits to each other,
+    // by computing (WideShadow rotl ShadowWidthBits) == WideShadow.
+    IRBuilder<> IRB(Pos);
+    Value *WideAddr =
+        IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
+    Value *WideShadow =
+        IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign);
     Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.PrimitiveShadowTy);
-    Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidthBits); 
-    Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidthBits); 
-    Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow); 
-    Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow); 
- 
-    BasicBlock *Head = Pos->getParent(); 
-    BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator()); 
- 
-    if (DomTreeNode *OldNode = DT.getNode(Head)) { 
-      std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); 
- 
-      DomTreeNode *NewNode = DT.addNewBlock(Tail, Head); 
-      for (auto Child : Children) 
-        DT.changeImmediateDominator(Child, NewNode); 
-    } 
- 
-    // In the following code LastBr will refer to the previous basic block's 
-    // conditional branch instruction, whose true successor is fixed up to point 
-    // to the next block during the loop below or to the tail after the final 
-    // iteration. 
-    BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq); 
-    ReplaceInstWithInst(Head->getTerminator(), LastBr); 
-    DT.addNewBlock(FallbackBB, Head); 
- 
-    for (uint64_t Ofs = 64 / DFS.ShadowWidthBits; Ofs != Size; 
-         Ofs += 64 / DFS.ShadowWidthBits) { 
-      BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F); 
-      DT.addNewBlock(NextBB, LastBr->getParent()); 
-      IRBuilder<> NextIRB(NextBB); 
-      WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr, 
-                                   ConstantInt::get(DFS.IntptrTy, 1)); 
-      Value *NextWideShadow = NextIRB.CreateAlignedLoad(NextIRB.getInt64Ty(), 
-                                                        WideAddr, ShadowAlign); 
-      ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow); 
-      LastBr->setSuccessor(0, NextBB); 
-      LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB); 
-    } 
- 
-    LastBr->setSuccessor(0, Tail); 
-    FallbackIRB.CreateBr(Tail); 
+    Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidthBits);
+    Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidthBits);
+    Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow);
+    Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
+
+    BasicBlock *Head = Pos->getParent();
+    BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator());
+
+    if (DomTreeNode *OldNode = DT.getNode(Head)) {
+      std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
+
+      DomTreeNode *NewNode = DT.addNewBlock(Tail, Head);
+      for (auto Child : Children)
+        DT.changeImmediateDominator(Child, NewNode);
+    }
+
+    // In the following code LastBr will refer to the previous basic block's
+    // conditional branch instruction, whose true successor is fixed up to point
+    // to the next block during the loop below or to the tail after the final
+    // iteration.
+    BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq);
+    ReplaceInstWithInst(Head->getTerminator(), LastBr);
+    DT.addNewBlock(FallbackBB, Head);
+
+    for (uint64_t Ofs = 64 / DFS.ShadowWidthBits; Ofs != Size;
+         Ofs += 64 / DFS.ShadowWidthBits) {
+      BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
+      DT.addNewBlock(NextBB, LastBr->getParent());
+      IRBuilder<> NextIRB(NextBB);
+      WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
+                                   ConstantInt::get(DFS.IntptrTy, 1));
+      Value *NextWideShadow = NextIRB.CreateAlignedLoad(NextIRB.getInt64Ty(),
+                                                        WideAddr, ShadowAlign);
+      ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
+      LastBr->setSuccessor(0, NextBB);
+      LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
+    }
+
+    LastBr->setSuccessor(0, Tail);
+    FallbackIRB.CreateBr(Tail);
     PHINode *Shadow =
         PHINode::Create(DFS.PrimitiveShadowTy, 2, "", &Tail->front());
-    Shadow->addIncoming(FallbackCall, FallbackBB); 
-    Shadow->addIncoming(TruncShadow, LastBr->getParent()); 
-    return Shadow; 
-  } 
- 
-  IRBuilder<> IRB(Pos); 
+    Shadow->addIncoming(FallbackCall, FallbackBB);
+    Shadow->addIncoming(TruncShadow, LastBr->getParent());
+    return Shadow;
+  }
+
+  IRBuilder<> IRB(Pos);
   FunctionCallee &UnionLoadFn =
       ClFast16Labels ? DFS.DFSanUnionLoadFast16LabelsFn : DFS.DFSanUnionLoadFn;
-  CallInst *FallbackCall = IRB.CreateCall( 
+  CallInst *FallbackCall = IRB.CreateCall(
       UnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
-  FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); 
-  return FallbackCall; 
-} 
- 
-void DFSanVisitor::visitLoadInst(LoadInst &LI) { 
-  auto &DL = LI.getModule()->getDataLayout(); 
-  uint64_t Size = DL.getTypeStoreSize(LI.getType()); 
-  if (Size == 0) { 
+  FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+  return FallbackCall;
+}
+
+void DFSanVisitor::visitLoadInst(LoadInst &LI) {
+  auto &DL = LI.getModule()->getDataLayout();
+  uint64_t Size = DL.getTypeStoreSize(LI.getType());
+  if (Size == 0) {
     DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));
-    return; 
-  } 
- 
-  Align Alignment = ClPreserveAlignment ? LI.getAlign() : Align(1); 
+    return;
+  }
+
+  Align Alignment = ClPreserveAlignment ? LI.getAlign() : Align(1);
   Value *PrimitiveShadow =
-      DFSF.loadShadow(LI.getPointerOperand(), Size, Alignment.value(), &LI); 
-  if (ClCombinePointerLabelsOnLoad) { 
-    Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); 
+      DFSF.loadShadow(LI.getPointerOperand(), Size, Alignment.value(), &LI);
+  if (ClCombinePointerLabelsOnLoad) {
+    Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
     PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, &LI);
-  } 
+  }
   if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))
     DFSF.NonZeroChecks.push_back(PrimitiveShadow);
- 
+
   Value *Shadow =
       DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, &LI);
-  DFSF.setShadow(&LI, Shadow); 
-  if (ClEventCallbacks) { 
-    IRBuilder<> IRB(&LI); 
+  DFSF.setShadow(&LI, Shadow);
+  if (ClEventCallbacks) {
+    IRBuilder<> IRB(&LI);
     Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr);
     IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8});
-  } 
-} 
- 
+  }
+}
+
 void DFSanFunction::storePrimitiveShadow(Value *Addr, uint64_t Size,
                                          Align Alignment,
                                          Value *PrimitiveShadow,
                                          Instruction *Pos) {
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) { 
-    const auto i = AllocaShadowMap.find(AI); 
-    if (i != AllocaShadowMap.end()) { 
-      IRBuilder<> IRB(Pos); 
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+    const auto i = AllocaShadowMap.find(AI);
+    if (i != AllocaShadowMap.end()) {
+      IRBuilder<> IRB(Pos);
       IRB.CreateStore(PrimitiveShadow, i->second);
-      return; 
-    } 
-  } 
- 
-  const Align ShadowAlign(Alignment.value() * DFS.ShadowWidthBytes); 
-  IRBuilder<> IRB(Pos); 
-  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos); 
+      return;
+    }
+  }
+
+  const Align ShadowAlign(Alignment.value() * DFS.ShadowWidthBytes);
+  IRBuilder<> IRB(Pos);
+  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
   if (DFS.isZeroShadow(PrimitiveShadow)) {
-    IntegerType *ShadowTy = 
-        IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits); 
-    Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0); 
-    Value *ExtShadowAddr = 
-        IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy)); 
-    IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign); 
-    return; 
-  } 
- 
-  const unsigned ShadowVecSize = 128 / DFS.ShadowWidthBits; 
-  uint64_t Offset = 0; 
-  if (Size >= ShadowVecSize) { 
+    IntegerType *ShadowTy =
+        IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
+    Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
+    Value *ExtShadowAddr =
+        IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
+    IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
+    return;
+  }
+
+  const unsigned ShadowVecSize = 128 / DFS.ShadowWidthBits;
+  uint64_t Offset = 0;
+  if (Size >= ShadowVecSize) {
     auto *ShadowVecTy =
         FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
-    Value *ShadowVec = UndefValue::get(ShadowVecTy); 
-    for (unsigned i = 0; i != ShadowVecSize; ++i) { 
-      ShadowVec = IRB.CreateInsertElement( 
+    Value *ShadowVec = UndefValue::get(ShadowVecTy);
+    for (unsigned i = 0; i != ShadowVecSize; ++i) {
+      ShadowVec = IRB.CreateInsertElement(
           ShadowVec, PrimitiveShadow,
           ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i));
-    } 
-    Value *ShadowVecAddr = 
-        IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy)); 
-    do { 
-      Value *CurShadowVecAddr = 
-          IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset); 
-      IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign); 
-      Size -= ShadowVecSize; 
-      ++Offset; 
-    } while (Size >= ShadowVecSize); 
-    Offset *= ShadowVecSize; 
-  } 
-  while (Size > 0) { 
-    Value *CurShadowAddr = 
+    }
+    Value *ShadowVecAddr =
+        IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
+    do {
+      Value *CurShadowVecAddr =
+          IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
+      IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
+      Size -= ShadowVecSize;
+      ++Offset;
+    } while (Size >= ShadowVecSize);
+    Offset *= ShadowVecSize;
+  }
+  while (Size > 0) {
+    Value *CurShadowAddr =
         IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
     IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
-    --Size; 
-    ++Offset; 
-  } 
-} 
- 
-void DFSanVisitor::visitStoreInst(StoreInst &SI) { 
-  auto &DL = SI.getModule()->getDataLayout(); 
-  uint64_t Size = DL.getTypeStoreSize(SI.getValueOperand()->getType()); 
-  if (Size == 0) 
-    return; 
- 
-  const Align Alignment = ClPreserveAlignment ? SI.getAlign() : Align(1); 
- 
-  Value* Shadow = DFSF.getShadow(SI.getValueOperand()); 
+    --Size;
+    ++Offset;
+  }
+}
+
+void DFSanVisitor::visitStoreInst(StoreInst &SI) {
+  auto &DL = SI.getModule()->getDataLayout();
+  uint64_t Size = DL.getTypeStoreSize(SI.getValueOperand()->getType());
+  if (Size == 0)
+    return;
+
+  const Align Alignment = ClPreserveAlignment ? SI.getAlign() : Align(1);
+
+  Value* Shadow = DFSF.getShadow(SI.getValueOperand());
   Value *PrimitiveShadow;
-  if (ClCombinePointerLabelsOnStore) { 
-    Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand()); 
+  if (ClCombinePointerLabelsOnStore) {
+    Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
     PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
   } else {
     PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, &SI);
-  } 
+  }
   DFSF.storePrimitiveShadow(SI.getPointerOperand(), Size, Alignment,
                             PrimitiveShadow, &SI);
-  if (ClEventCallbacks) { 
-    IRBuilder<> IRB(&SI); 
+  if (ClEventCallbacks) {
+    IRBuilder<> IRB(&SI);
     Value *Addr8 = IRB.CreateBitCast(SI.getPointerOperand(), DFSF.DFS.Int8Ptr);
     IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr8});
-  } 
-} 
- 
-void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) { 
-  visitOperandShadowInst(UO); 
-} 
- 
-void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) { 
-  visitOperandShadowInst(BO); 
-} 
- 
-void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); } 
- 
-void DFSanVisitor::visitCmpInst(CmpInst &CI) { 
-  Value *CombinedShadow = visitOperandShadowInst(CI); 
-  if (ClEventCallbacks) { 
-    IRBuilder<> IRB(&CI); 
-    IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow); 
-  } 
-} 
- 
-void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { 
-  visitOperandShadowInst(GEPI); 
-} 
- 
-void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) { 
-  visitOperandShadowInst(I); 
-} 
- 
-void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) { 
-  visitOperandShadowInst(I); 
-} 
- 
-void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) { 
-  visitOperandShadowInst(I); 
-} 
- 
-void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) { 
+  }
+}
+
+void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
+  visitOperandShadowInst(UO);
+}
+
+void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
+  visitOperandShadowInst(BO);
+}
+
+void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); }
+
+void DFSanVisitor::visitCmpInst(CmpInst &CI) {
+  Value *CombinedShadow = visitOperandShadowInst(CI);
+  if (ClEventCallbacks) {
+    IRBuilder<> IRB(&CI);
+    IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);
+  }
+}
+
+void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+  visitOperandShadowInst(GEPI);
+}
+
+void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
+  visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
+  visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
+  visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
   if (!DFSF.DFS.shouldTrackFieldsAndIndices()) {
     visitOperandShadowInst(I);
     return;
@@ -1817,9 +1817,9 @@ void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
   Value *AggShadow = DFSF.getShadow(Agg);
   Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
   DFSF.setShadow(&I, ResShadow);
-} 
- 
-void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) { 
+}
+
+void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
   if (!DFSF.DFS.shouldTrackFieldsAndIndices()) {
     visitOperandShadowInst(I);
     return;
@@ -1830,93 +1830,93 @@ void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
   Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
   Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
   DFSF.setShadow(&I, Res);
-} 
- 
-void DFSanVisitor::visitAllocaInst(AllocaInst &I) { 
-  bool AllLoadsStores = true; 
-  for (User *U : I.users()) { 
-    if (isa<LoadInst>(U)) 
-      continue; 
- 
-    if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 
-      if (SI->getPointerOperand() == &I) 
-        continue; 
-    } 
- 
-    AllLoadsStores = false; 
-    break; 
-  } 
-  if (AllLoadsStores) { 
-    IRBuilder<> IRB(&I); 
+}
+
+void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
+  bool AllLoadsStores = true;
+  for (User *U : I.users()) {
+    if (isa<LoadInst>(U))
+      continue;
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getPointerOperand() == &I)
+        continue;
+    }
+
+    AllLoadsStores = false;
+    break;
+  }
+  if (AllLoadsStores) {
+    IRBuilder<> IRB(&I);
     DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);
-  } 
+  }
   DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);
-} 
- 
-void DFSanVisitor::visitSelectInst(SelectInst &I) { 
-  Value *CondShadow = DFSF.getShadow(I.getCondition()); 
-  Value *TrueShadow = DFSF.getShadow(I.getTrueValue()); 
-  Value *FalseShadow = DFSF.getShadow(I.getFalseValue()); 
+}
+
+void DFSanVisitor::visitSelectInst(SelectInst &I) {
+  Value *CondShadow = DFSF.getShadow(I.getCondition());
+  Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
+  Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
   Value *ShadowSel = nullptr;
- 
-  if (isa<VectorType>(I.getCondition()->getType())) { 
+
+  if (isa<VectorType>(I.getCondition()->getType())) {
     ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
                                                FalseShadow, &I);
-  } else { 
-    if (TrueShadow == FalseShadow) { 
-      ShadowSel = TrueShadow; 
-    } else { 
-      ShadowSel = 
-          SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I); 
-    } 
-  } 
+  } else {
+    if (TrueShadow == FalseShadow) {
+      ShadowSel = TrueShadow;
+    } else {
+      ShadowSel =
+          SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
+    }
+  }
   DFSF.setShadow(&I, ClTrackSelectControlFlow
                          ? DFSF.combineShadowsThenConvert(
                                I.getType(), CondShadow, ShadowSel, &I)
                          : ShadowSel);
-} 
- 
-void DFSanVisitor::visitMemSetInst(MemSetInst &I) { 
-  IRBuilder<> IRB(&I); 
-  Value *ValShadow = DFSF.getShadow(I.getValue()); 
-  IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn, 
-                 {ValShadow, IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy( 
-                                                                *DFSF.DFS.Ctx)), 
-                  IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)}); 
-} 
- 
-void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { 
-  IRBuilder<> IRB(&I); 
-  Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I); 
-  Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I); 
-  Value *LenShadow = 
-      IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(), 
-                                                    DFSF.DFS.ShadowWidthBytes)); 
-  Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx); 
-  Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr); 
-  SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr); 
-  auto *MTI = cast<MemTransferInst>( 
-      IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(), 
-                     {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()})); 
-  if (ClPreserveAlignment) { 
-    MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes); 
-    MTI->setSourceAlignment(I.getSourceAlign() * DFSF.DFS.ShadowWidthBytes); 
-  } else { 
-    MTI->setDestAlignment(Align(DFSF.DFS.ShadowWidthBytes)); 
-    MTI->setSourceAlignment(Align(DFSF.DFS.ShadowWidthBytes)); 
-  } 
-  if (ClEventCallbacks) { 
-    IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn, 
-                   {RawDestShadow, I.getLength()}); 
-  } 
-} 
- 
-void DFSanVisitor::visitReturnInst(ReturnInst &RI) { 
-  if (!DFSF.IsNativeABI && RI.getReturnValue()) { 
-    switch (DFSF.IA) { 
-    case DataFlowSanitizer::IA_TLS: { 
-      Value *S = DFSF.getShadow(RI.getReturnValue()); 
-      IRBuilder<> IRB(&RI); 
+}
+
+void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
+  IRBuilder<> IRB(&I);
+  Value *ValShadow = DFSF.getShadow(I.getValue());
+  IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,
+                 {ValShadow, IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(
+                                                                *DFSF.DFS.Ctx)),
+                  IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
+}
+
+void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
+  IRBuilder<> IRB(&I);
+  Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
+  Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
+  Value *LenShadow =
+      IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
+                                                    DFSF.DFS.ShadowWidthBytes));
+  Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
+  Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr);
+  SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
+  auto *MTI = cast<MemTransferInst>(
+      IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
+                     {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
+  if (ClPreserveAlignment) {
+    MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes);
+    MTI->setSourceAlignment(I.getSourceAlign() * DFSF.DFS.ShadowWidthBytes);
+  } else {
+    MTI->setDestAlignment(Align(DFSF.DFS.ShadowWidthBytes));
+    MTI->setSourceAlignment(Align(DFSF.DFS.ShadowWidthBytes));
+  }
+  if (ClEventCallbacks) {
+    IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn,
+                   {RawDestShadow, I.getLength()});
+  }
+}
+
+void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
+  if (!DFSF.IsNativeABI && RI.getReturnValue()) {
+    switch (DFSF.IA) {
+    case DataFlowSanitizer::IA_TLS: {
+      Value *S = DFSF.getShadow(RI.getReturnValue());
+      IRBuilder<> IRB(&RI);
       Type *RT = DFSF.F->getFunctionType()->getReturnType();
       unsigned Size =
           getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
@@ -1926,166 +1926,166 @@ void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
         IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB),
                                kShadowTLSAlignment);
       }
-      break; 
-    } 
-    case DataFlowSanitizer::IA_Args: { 
-      IRBuilder<> IRB(&RI); 
-      Type *RT = DFSF.F->getFunctionType()->getReturnType(); 
-      Value *InsVal = 
-          IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0); 
-      Value *InsShadow = 
-          IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1); 
-      RI.setOperand(0, InsShadow); 
-      break; 
-    } 
-    } 
-  } 
-} 
- 
-void DFSanVisitor::visitCallBase(CallBase &CB) { 
-  Function *F = CB.getCalledFunction(); 
-  if ((F && F->isIntrinsic()) || CB.isInlineAsm()) { 
-    visitOperandShadowInst(CB); 
-    return; 
-  } 
- 
-  // Calls to this function are synthesized in wrappers, and we shouldn't 
-  // instrument them. 
-  if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts()) 
-    return; 
- 
-  IRBuilder<> IRB(&CB); 
- 
-  DenseMap<Value *, Function *>::iterator i = 
-      DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand()); 
-  if (i != DFSF.DFS.UnwrappedFnMap.end()) { 
-    Function *F = i->second; 
-    switch (DFSF.DFS.getWrapperKind(F)) { 
-    case DataFlowSanitizer::WK_Warning: 
-      CB.setCalledFunction(F); 
-      IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn, 
-                     IRB.CreateGlobalStringPtr(F->getName())); 
+      break;
+    }
+    case DataFlowSanitizer::IA_Args: {
+      IRBuilder<> IRB(&RI);
+      Type *RT = DFSF.F->getFunctionType()->getReturnType();
+      Value *InsVal =
+          IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0);
+      Value *InsShadow =
+          IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1);
+      RI.setOperand(0, InsShadow);
+      break;
+    }
+    }
+  }
+}
+
+void DFSanVisitor::visitCallBase(CallBase &CB) {
+  Function *F = CB.getCalledFunction();
+  if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {
+    visitOperandShadowInst(CB);
+    return;
+  }
+
+  // Calls to this function are synthesized in wrappers, and we shouldn't
+  // instrument them.
+  if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
+    return;
+
+  IRBuilder<> IRB(&CB);
+
+  DenseMap<Value *, Function *>::iterator i =
+      DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());
+  if (i != DFSF.DFS.UnwrappedFnMap.end()) {
+    Function *F = i->second;
+    switch (DFSF.DFS.getWrapperKind(F)) {
+    case DataFlowSanitizer::WK_Warning:
+      CB.setCalledFunction(F);
+      IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
+                     IRB.CreateGlobalStringPtr(F->getName()));
       DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
-      return; 
-    case DataFlowSanitizer::WK_Discard: 
-      CB.setCalledFunction(F); 
+      return;
+    case DataFlowSanitizer::WK_Discard:
+      CB.setCalledFunction(F);
       DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
-      return; 
-    case DataFlowSanitizer::WK_Functional: 
-      CB.setCalledFunction(F); 
-      visitOperandShadowInst(CB); 
-      return; 
-    case DataFlowSanitizer::WK_Custom: 
-      // Don't try to handle invokes of custom functions, it's too complicated. 
-      // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_ 
-      // wrapper. 
-      if (CallInst *CI = dyn_cast<CallInst>(&CB)) { 
-        FunctionType *FT = F->getFunctionType(); 
-        TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT); 
-        std::string CustomFName = "__dfsw_"; 
-        CustomFName += F->getName(); 
-        FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction( 
-            CustomFName, CustomFn.TransformedType); 
-        if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) { 
-          CustomFn->copyAttributesFrom(F); 
- 
-          // Custom functions returning non-void will write to the return label. 
-          if (!FT->getReturnType()->isVoidTy()) { 
-            CustomFn->removeAttributes(AttributeList::FunctionIndex, 
-                                       DFSF.DFS.ReadOnlyNoneAttrs); 
-          } 
-        } 
- 
-        std::vector<Value *> Args; 
- 
-        auto i = CB.arg_begin(); 
-        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) { 
-          Type *T = (*i)->getType(); 
-          FunctionType *ParamFT; 
-          if (isa<PointerType>(T) && 
-              (ParamFT = dyn_cast<FunctionType>( 
-                   cast<PointerType>(T)->getElementType()))) { 
-            std::string TName = "dfst"; 
-            TName += utostr(FT->getNumParams() - n); 
-            TName += "$"; 
-            TName += F->getName(); 
-            Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName); 
-            Args.push_back(T); 
-            Args.push_back( 
-                IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx))); 
-          } else { 
-            Args.push_back(*i); 
-          } 
-        } 
- 
-        i = CB.arg_begin(); 
-        const unsigned ShadowArgStart = Args.size(); 
-        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) 
+      return;
+    case DataFlowSanitizer::WK_Functional:
+      CB.setCalledFunction(F);
+      visitOperandShadowInst(CB);
+      return;
+    case DataFlowSanitizer::WK_Custom:
+      // Don't try to handle invokes of custom functions, it's too complicated.
+      // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
+      // wrapper.
+      if (CallInst *CI = dyn_cast<CallInst>(&CB)) {
+        FunctionType *FT = F->getFunctionType();
+        TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
+        std::string CustomFName = "__dfsw_";
+        CustomFName += F->getName();
+        FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
+            CustomFName, CustomFn.TransformedType);
+        if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
+          CustomFn->copyAttributesFrom(F);
+
+          // Custom functions returning non-void will write to the return label.
+          if (!FT->getReturnType()->isVoidTy()) {
+            CustomFn->removeAttributes(AttributeList::FunctionIndex,
+                                       DFSF.DFS.ReadOnlyNoneAttrs);
+          }
+        }
+
+        std::vector<Value *> Args;
+
+        auto i = CB.arg_begin();
+        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) {
+          Type *T = (*i)->getType();
+          FunctionType *ParamFT;
+          if (isa<PointerType>(T) &&
+              (ParamFT = dyn_cast<FunctionType>(
+                   cast<PointerType>(T)->getElementType()))) {
+            std::string TName = "dfst";
+            TName += utostr(FT->getNumParams() - n);
+            TName += "$";
+            TName += F->getName();
+            Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
+            Args.push_back(T);
+            Args.push_back(
+                IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
+          } else {
+            Args.push_back(*i);
+          }
+        }
+
+        i = CB.arg_begin();
+        const unsigned ShadowArgStart = Args.size();
+        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
           Args.push_back(
               DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*i), &CB));
- 
-        if (FT->isVarArg()) { 
+
+        if (FT->isVarArg()) {
           auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,
-                                           CB.arg_size() - FT->getNumParams()); 
-          auto *LabelVAAlloca = new AllocaInst( 
-              LabelVATy, getDataLayout().getAllocaAddrSpace(), 
-              "labelva", &DFSF.F->getEntryBlock().front()); 
- 
-          for (unsigned n = 0; i != CB.arg_end(); ++i, ++n) { 
-            auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n); 
+                                           CB.arg_size() - FT->getNumParams());
+          auto *LabelVAAlloca = new AllocaInst(
+              LabelVATy, getDataLayout().getAllocaAddrSpace(),
+              "labelva", &DFSF.F->getEntryBlock().front());
+
+          for (unsigned n = 0; i != CB.arg_end(); ++i, ++n) {
+            auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
             IRB.CreateStore(
                 DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*i), &CB),
                 LabelVAPtr);
-          } 
- 
-          Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0)); 
-        } 
- 
-        if (!FT->getReturnType()->isVoidTy()) { 
-          if (!DFSF.LabelReturnAlloca) { 
-            DFSF.LabelReturnAlloca = 
+          }
+
+          Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
+        }
+
+        if (!FT->getReturnType()->isVoidTy()) {
+          if (!DFSF.LabelReturnAlloca) {
+            DFSF.LabelReturnAlloca =
                 new AllocaInst(DFSF.DFS.PrimitiveShadowTy,
                                getDataLayout().getAllocaAddrSpace(),
                                "labelreturn", &DFSF.F->getEntryBlock().front());
-          } 
-          Args.push_back(DFSF.LabelReturnAlloca); 
-        } 
- 
-        for (i = CB.arg_begin() + FT->getNumParams(); i != CB.arg_end(); ++i) 
-          Args.push_back(*i); 
- 
-        CallInst *CustomCI = IRB.CreateCall(CustomF, Args); 
-        CustomCI->setCallingConv(CI->getCallingConv()); 
-        CustomCI->setAttributes(TransformFunctionAttributes(CustomFn, 
-            CI->getContext(), CI->getAttributes())); 
- 
-        // Update the parameter attributes of the custom call instruction to 
-        // zero extend the shadow parameters. This is required for targets 
+          }
+          Args.push_back(DFSF.LabelReturnAlloca);
+        }
+
+        for (i = CB.arg_begin() + FT->getNumParams(); i != CB.arg_end(); ++i)
+          Args.push_back(*i);
+
+        CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
+        CustomCI->setCallingConv(CI->getCallingConv());
+        CustomCI->setAttributes(TransformFunctionAttributes(CustomFn,
+            CI->getContext(), CI->getAttributes()));
+
+        // Update the parameter attributes of the custom call instruction to
+        // zero extend the shadow parameters. This is required for targets
         // which consider PrimitiveShadowTy an illegal type.
-        for (unsigned n = 0; n < FT->getNumParams(); n++) { 
-          const unsigned ArgNo = ShadowArgStart + n; 
+        for (unsigned n = 0; n < FT->getNumParams(); n++) {
+          const unsigned ArgNo = ShadowArgStart + n;
           if (CustomCI->getArgOperand(ArgNo)->getType() ==
               DFSF.DFS.PrimitiveShadowTy)
-            CustomCI->addParamAttr(ArgNo, Attribute::ZExt); 
-        } 
- 
-        if (!FT->getReturnType()->isVoidTy()) { 
+            CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
+        }
+
+        if (!FT->getReturnType()->isVoidTy()) {
           LoadInst *LabelLoad = IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy,
                                                DFSF.LabelReturnAlloca);
           DFSF.setShadow(CustomCI, DFSF.expandFromPrimitiveShadow(
                                        FT->getReturnType(), LabelLoad, &CB));
-        } 
- 
-        CI->replaceAllUsesWith(CustomCI); 
-        CI->eraseFromParent(); 
-        return; 
-      } 
-      break; 
-    } 
-  } 
- 
-  FunctionType *FT = CB.getFunctionType(); 
-  if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { 
+        }
+
+        CI->replaceAllUsesWith(CustomCI);
+        CI->eraseFromParent();
+        return;
+      }
+      break;
+    }
+  }
+
+  FunctionType *FT = CB.getFunctionType();
+  if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
     unsigned ArgOffset = 0;
     const DataLayout &DL = getDataLayout();
     for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
@@ -2100,26 +2100,26 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
           DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
           kShadowTLSAlignment);
       ArgOffset += alignTo(Size, kShadowTLSAlignment);
-    } 
-  } 
- 
-  Instruction *Next = nullptr; 
-  if (!CB.getType()->isVoidTy()) { 
-    if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 
-      if (II->getNormalDest()->getSinglePredecessor()) { 
-        Next = &II->getNormalDest()->front(); 
-      } else { 
-        BasicBlock *NewBB = 
-            SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT); 
-        Next = &NewBB->front(); 
-      } 
-    } else { 
-      assert(CB.getIterator() != CB.getParent()->end()); 
-      Next = CB.getNextNode(); 
-    } 
- 
-    if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { 
-      IRBuilder<> NextIRB(Next); 
+    }
+  }
+
+  Instruction *Next = nullptr;
+  if (!CB.getType()->isVoidTy()) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
+      if (II->getNormalDest()->getSinglePredecessor()) {
+        Next = &II->getNormalDest()->front();
+      } else {
+        BasicBlock *NewBB =
+            SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
+        Next = &NewBB->front();
+      }
+    } else {
+      assert(CB.getIterator() != CB.getParent()->end());
+      Next = CB.getNextNode();
+    }
+
+    if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+      IRBuilder<> NextIRB(Next);
       const DataLayout &DL = getDataLayout();
       unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
       if (Size > kRetvalTLSSize) {
@@ -2133,83 +2133,83 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
         DFSF.setShadow(&CB, LI);
         DFSF.NonZeroChecks.push_back(LI);
       }
-    } 
-  } 
- 
-  // Do all instrumentation for IA_Args down here to defer tampering with the 
-  // CFG in a way that SplitEdge may be able to detect. 
-  if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) { 
-    FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT); 
-    Value *Func = 
-        IRB.CreateBitCast(CB.getCalledOperand(), PointerType::getUnqual(NewFT)); 
-    std::vector<Value *> Args; 
- 
-    auto i = CB.arg_begin(), E = CB.arg_end(); 
-    for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) 
-      Args.push_back(*i); 
- 
-    i = CB.arg_begin(); 
-    for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) 
-      Args.push_back(DFSF.getShadow(*i)); 
- 
-    if (FT->isVarArg()) { 
-      unsigned VarArgSize = CB.arg_size() - FT->getNumParams(); 
+    }
+  }
+
+  // Do all instrumentation for IA_Args down here to defer tampering with the
+  // CFG in a way that SplitEdge may be able to detect.
+  if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) {
+    FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT);
+    Value *Func =
+        IRB.CreateBitCast(CB.getCalledOperand(), PointerType::getUnqual(NewFT));
+    std::vector<Value *> Args;
+
+    auto i = CB.arg_begin(), E = CB.arg_end();
+    for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+      Args.push_back(*i);
+
+    i = CB.arg_begin();
+    for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+      Args.push_back(DFSF.getShadow(*i));
+
+    if (FT->isVarArg()) {
+      unsigned VarArgSize = CB.arg_size() - FT->getNumParams();
       ArrayType *VarArgArrayTy =
           ArrayType::get(DFSF.DFS.PrimitiveShadowTy, VarArgSize);
-      AllocaInst *VarArgShadow = 
-        new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(), 
-                       "", &DFSF.F->getEntryBlock().front()); 
-      Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0)); 
-      for (unsigned n = 0; i != E; ++i, ++n) { 
-        IRB.CreateStore( 
-            DFSF.getShadow(*i), 
-            IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n)); 
-        Args.push_back(*i); 
-      } 
-    } 
- 
-    CallBase *NewCB; 
-    if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) { 
-      NewCB = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(), 
-                               II->getUnwindDest(), Args); 
-    } else { 
-      NewCB = IRB.CreateCall(NewFT, Func, Args); 
-    } 
-    NewCB->setCallingConv(CB.getCallingConv()); 
-    NewCB->setAttributes(CB.getAttributes().removeAttributes( 
-        *DFSF.DFS.Ctx, AttributeList::ReturnIndex, 
-        AttributeFuncs::typeIncompatible(NewCB->getType()))); 
- 
-    if (Next) { 
-      ExtractValueInst *ExVal = ExtractValueInst::Create(NewCB, 0, "", Next); 
-      DFSF.SkipInsts.insert(ExVal); 
-      ExtractValueInst *ExShadow = ExtractValueInst::Create(NewCB, 1, "", Next); 
-      DFSF.SkipInsts.insert(ExShadow); 
-      DFSF.setShadow(ExVal, ExShadow); 
-      DFSF.NonZeroChecks.push_back(ExShadow); 
- 
-      CB.replaceAllUsesWith(ExVal); 
-    } 
- 
-    CB.eraseFromParent(); 
-  } 
-} 
- 
-void DFSanVisitor::visitPHINode(PHINode &PN) { 
+      AllocaInst *VarArgShadow =
+        new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(),
+                       "", &DFSF.F->getEntryBlock().front());
+      Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
+      for (unsigned n = 0; i != E; ++i, ++n) {
+        IRB.CreateStore(
+            DFSF.getShadow(*i),
+            IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n));
+        Args.push_back(*i);
+      }
+    }
+
+    CallBase *NewCB;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
+      NewCB = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(),
+                               II->getUnwindDest(), Args);
+    } else {
+      NewCB = IRB.CreateCall(NewFT, Func, Args);
+    }
+    NewCB->setCallingConv(CB.getCallingConv());
+    NewCB->setAttributes(CB.getAttributes().removeAttributes(
+        *DFSF.DFS.Ctx, AttributeList::ReturnIndex,
+        AttributeFuncs::typeIncompatible(NewCB->getType())));
+
+    if (Next) {
+      ExtractValueInst *ExVal = ExtractValueInst::Create(NewCB, 0, "", Next);
+      DFSF.SkipInsts.insert(ExVal);
+      ExtractValueInst *ExShadow = ExtractValueInst::Create(NewCB, 1, "", Next);
+      DFSF.SkipInsts.insert(ExShadow);
+      DFSF.setShadow(ExVal, ExShadow);
+      DFSF.NonZeroChecks.push_back(ExShadow);
+
+      CB.replaceAllUsesWith(ExVal);
+    }
+
+    CB.eraseFromParent();
+  }
+}
+
+void DFSanVisitor::visitPHINode(PHINode &PN) {
   Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);
-  PHINode *ShadowPN = 
+  PHINode *ShadowPN =
       PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "", &PN);
- 
-  // Give the shadow phi node valid predecessors to fool SplitEdge into working. 
+
+  // Give the shadow phi node valid predecessors to fool SplitEdge into working.
   Value *UndefShadow = UndefValue::get(ShadowTy);
-  for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e; 
-       ++i) { 
-    ShadowPN->addIncoming(UndefShadow, *i); 
-  } 
- 
-  DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN)); 
-  DFSF.setShadow(&PN, ShadowPN); 
-} 
+  for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e;
+       ++i) {
+    ShadowPN->addIncoming(UndefShadow, *i);
+  }
+
+  DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN));
+  DFSF.setShadow(&PN, ShadowPN);
+}
 
 namespace {
 class DataFlowSanitizerLegacyPass : public ModulePass {
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 8d53a5d27f..527644a69d 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -1,185 +1,185 @@
-//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This pass implements GCOV-style profiling. When this pass is run it emits 
-// "gcno" files next to the existing source, and instruments the code that runs 
-// to records the edges between blocks that run and emit a complementary "gcda" 
-// file on exit. 
-// 
-//===----------------------------------------------------------------------===// 
- 
+//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements GCOV-style profiling. When this pass is run it emits
+// "gcno" files next to the existing source, and instruments the code that runs
+// to records the edges between blocks that run and emit a complementary "gcda"
+// file on exit.
+//
+//===----------------------------------------------------------------------===//
+
 #include "CFGMST.h"
-#include "llvm/ADT/DenseMap.h" 
-#include "llvm/ADT/Hashing.h" 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h" 
-#include "llvm/ADT/Sequence.h" 
-#include "llvm/ADT/Statistic.h" 
-#include "llvm/ADT/StringExtras.h" 
-#include "llvm/ADT/StringMap.h" 
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/EHPersonalities.h" 
-#include "llvm/Analysis/TargetLibraryInfo.h" 
-#include "llvm/IR/CFG.h" 
-#include "llvm/IR/DebugInfo.h" 
-#include "llvm/IR/DebugLoc.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/InstIterator.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/IR/Module.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Pass.h" 
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CRC.h"
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/FileSystem.h" 
-#include "llvm/Support/Path.h" 
-#include "llvm/Support/Regex.h" 
-#include "llvm/Support/raw_ostream.h" 
-#include "llvm/Transforms/Instrumentation.h" 
-#include "llvm/Transforms/Instrumentation/GCOVProfiler.h" 
-#include "llvm/Transforms/Utils/ModuleUtils.h" 
-#include <algorithm> 
-#include <memory> 
-#include <string> 
-#include <utility> 
- 
-using namespace llvm; 
-namespace endian = llvm::support::endian; 
- 
-#define DEBUG_TYPE "insert-gcov-profiling" 
- 
-enum : uint32_t { 
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+
+using namespace llvm;
+namespace endian = llvm::support::endian;
+
+#define DEBUG_TYPE "insert-gcov-profiling"
+
+enum : uint32_t {
   GCOV_ARC_ON_TREE = 1 << 0,
 
-  GCOV_TAG_FUNCTION = 0x01000000, 
-  GCOV_TAG_BLOCKS = 0x01410000, 
-  GCOV_TAG_ARCS = 0x01430000, 
-  GCOV_TAG_LINES = 0x01450000, 
-}; 
- 
-static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version", 
-                                               cl::init("408*"), cl::Hidden, 
-                                               cl::ValueRequired); 
- 
+  GCOV_TAG_FUNCTION = 0x01000000,
+  GCOV_TAG_BLOCKS = 0x01410000,
+  GCOV_TAG_ARCS = 0x01430000,
+  GCOV_TAG_LINES = 0x01450000,
+};
+
+static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version",
+                                               cl::init("408*"), cl::Hidden,
+                                               cl::ValueRequired);
+
 static cl::opt<bool> AtomicCounter("gcov-atomic-counter", cl::Hidden,
                                    cl::desc("Make counter updates atomic"));
 
-// Returns the number of words which will be used to represent this string. 
-static unsigned wordsOfString(StringRef s) { 
-  // Length + NUL-terminated string + 0~3 padding NULs. 
-  return (s.size() / 4) + 2; 
-} 
- 
-GCOVOptions GCOVOptions::getDefault() { 
-  GCOVOptions Options; 
-  Options.EmitNotes = true; 
-  Options.EmitData = true; 
-  Options.NoRedZone = false; 
+// Returns the number of words which will be used to represent this string.
+static unsigned wordsOfString(StringRef s) {
+  // Length + NUL-terminated string + 0~3 padding NULs.
+  return (s.size() / 4) + 2;
+}
+
+GCOVOptions GCOVOptions::getDefault() {
+  GCOVOptions Options;
+  Options.EmitNotes = true;
+  Options.EmitData = true;
+  Options.NoRedZone = false;
   Options.Atomic = AtomicCounter;
- 
-  if (DefaultGCOVVersion.size() != 4) { 
-    llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") + 
-                             DefaultGCOVVersion); 
-  } 
-  memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4); 
-  return Options; 
-} 
- 
-namespace { 
-class GCOVFunction; 
- 
-class GCOVProfiler { 
-public: 
-  GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {} 
-  GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {} 
-  bool 
+
+  if (DefaultGCOVVersion.size() != 4) {
+    llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
+                             DefaultGCOVVersion);
+  }
+  memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
+  return Options;
+}
+
+namespace {
+class GCOVFunction;
+
+class GCOVProfiler {
+public:
+  GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
+  GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
+  bool
   runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
               function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
-              std::function<const TargetLibraryInfo &(Function &F)> GetTLI); 
- 
-  void write(uint32_t i) { 
-    char Bytes[4]; 
-    endian::write32(Bytes, i, Endian); 
-    os->write(Bytes, 4); 
-  } 
-  void writeString(StringRef s) { 
-    write(wordsOfString(s) - 1); 
-    os->write(s.data(), s.size()); 
-    os->write_zeros(4 - s.size() % 4); 
-  } 
-  void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); } 
- 
-private: 
-  // Create the .gcno files for the Module based on DebugInfo. 
+              std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
+
+  void write(uint32_t i) {
+    char Bytes[4];
+    endian::write32(Bytes, i, Endian);
+    os->write(Bytes, 4);
+  }
+  void writeString(StringRef s) {
+    write(wordsOfString(s) - 1);
+    os->write(s.data(), s.size());
+    os->write_zeros(4 - s.size() % 4);
+  }
+  void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); }
+
+private:
+  // Create the .gcno files for the Module based on DebugInfo.
   bool
   emitProfileNotes(NamedMDNode *CUNode, bool HasExecOrFork,
                    function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
                    function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
                    function_ref<const TargetLibraryInfo &(Function &F)> GetTLI);
- 
+
   void emitGlobalConstructor(
       SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP);
- 
-  bool isFunctionInstrumented(const Function &F); 
-  std::vector<Regex> createRegexesFromString(StringRef RegexesStr); 
-  static bool doesFilenameMatchARegex(StringRef Filename, 
-                                      std::vector<Regex> &Regexes); 
- 
-  // Get pointers to the functions in the runtime library. 
-  FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI); 
-  FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI); 
-  FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI); 
-  FunctionCallee getSummaryInfoFunc(); 
-  FunctionCallee getEndFileFunc(); 
- 
-  // Add the function to write out all our counters to the global destructor 
-  // list. 
-  Function * 
-  insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>); 
-  Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>); 
- 
-  bool AddFlushBeforeForkAndExec(); 
- 
-  enum class GCovFileType { GCNO, GCDA }; 
-  std::string mangleName(const DICompileUnit *CU, GCovFileType FileType); 
- 
-  GCOVOptions Options; 
-  support::endianness Endian; 
-  raw_ostream *os; 
- 
-  // Checksum, produced by hash of EdgeDestinations 
-  SmallVector<uint32_t, 4> FileChecksums; 
- 
-  Module *M = nullptr; 
-  std::function<const TargetLibraryInfo &(Function &F)> GetTLI; 
-  LLVMContext *Ctx = nullptr; 
-  SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs; 
-  std::vector<Regex> FilterRe; 
-  std::vector<Regex> ExcludeRe; 
+
+  bool isFunctionInstrumented(const Function &F);
+  std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
+  static bool doesFilenameMatchARegex(StringRef Filename,
+                                      std::vector<Regex> &Regexes);
+
+  // Get pointers to the functions in the runtime library.
+  FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI);
+  FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI);
+  FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI);
+  FunctionCallee getSummaryInfoFunc();
+  FunctionCallee getEndFileFunc();
+
+  // Add the function to write out all our counters to the global destructor
+  // list.
+  Function *
+  insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
+  Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
+
+  bool AddFlushBeforeForkAndExec();
+
+  enum class GCovFileType { GCNO, GCDA };
+  std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
+
+  GCOVOptions Options;
+  support::endianness Endian;
+  raw_ostream *os;
+
+  // Checksum, produced by hash of EdgeDestinations
+  SmallVector<uint32_t, 4> FileChecksums;
+
+  Module *M = nullptr;
+  std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
+  LLVMContext *Ctx = nullptr;
+  SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
+  std::vector<Regex> FilterRe;
+  std::vector<Regex> ExcludeRe;
   DenseSet<const BasicBlock *> ExecBlocks;
-  StringMap<bool> InstrumentedFiles; 
-}; 
- 
-class GCOVProfilerLegacyPass : public ModulePass { 
-public: 
-  static char ID; 
-  GCOVProfilerLegacyPass() 
-      : GCOVProfilerLegacyPass(GCOVOptions::getDefault()) {} 
-  GCOVProfilerLegacyPass(const GCOVOptions &Opts) 
-      : ModulePass(ID), Profiler(Opts) { 
-    initializeGCOVProfilerLegacyPassPass(*PassRegistry::getPassRegistry()); 
-  } 
-  StringRef getPassName() const override { return "GCOV Profiler"; } 
- 
-  bool runOnModule(Module &M) override { 
+  StringMap<bool> InstrumentedFiles;
+};
+
+class GCOVProfilerLegacyPass : public ModulePass {
+public:
+  static char ID;
+  GCOVProfilerLegacyPass()
+      : GCOVProfilerLegacyPass(GCOVOptions::getDefault()) {}
+  GCOVProfilerLegacyPass(const GCOVOptions &Opts)
+      : ModulePass(ID), Profiler(Opts) {
+    initializeGCOVProfilerLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+  StringRef getPassName() const override { return "GCOV Profiler"; }
+
+  bool runOnModule(Module &M) override {
     auto GetBFI = [this](Function &F) {
       return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
     };
@@ -190,16 +190,16 @@ public:
       return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
     };
     return Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI);
-  } 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
-    AU.addRequired<TargetLibraryInfoWrapperPass>(); 
-  } 
- 
-private: 
-  GCOVProfiler Profiler; 
-}; 
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+
+private:
+  GCOVProfiler Profiler;
+};
 
 struct BBInfo {
   BBInfo *Group;
@@ -234,225 +234,225 @@ struct Edge {
         .str();
   }
 };
-} 
- 
-char GCOVProfilerLegacyPass::ID = 0; 
-INITIALIZE_PASS_BEGIN( 
-    GCOVProfilerLegacyPass, "insert-gcov-profiling", 
-    "Insert instrumentation for GCOV profiling", false, false) 
+}
+
+char GCOVProfilerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(
+    GCOVProfilerLegacyPass, "insert-gcov-profiling",
+    "Insert instrumentation for GCOV profiling", false, false)
 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 
-INITIALIZE_PASS_END( 
-    GCOVProfilerLegacyPass, "insert-gcov-profiling", 
-    "Insert instrumentation for GCOV profiling", false, false) 
- 
-ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { 
-  return new GCOVProfilerLegacyPass(Options); 
-} 
- 
-static StringRef getFunctionName(const DISubprogram *SP) { 
-  if (!SP->getLinkageName().empty()) 
-    return SP->getLinkageName(); 
-  return SP->getName(); 
-} 
- 
-/// Extract a filename for a DISubprogram. 
-/// 
-/// Prefer relative paths in the coverage notes. Clang also may split 
-/// up absolute paths into a directory and filename component. When 
-/// the relative path doesn't exist, reconstruct the absolute path. 
-static SmallString<128> getFilename(const DISubprogram *SP) { 
-  SmallString<128> Path; 
-  StringRef RelPath = SP->getFilename(); 
-  if (sys::fs::exists(RelPath)) 
-    Path = RelPath; 
-  else 
-    sys::path::append(Path, SP->getDirectory(), SP->getFilename()); 
-  return Path; 
-} 
- 
-namespace { 
-  class GCOVRecord { 
-  protected: 
-    GCOVProfiler *P; 
- 
-    GCOVRecord(GCOVProfiler *P) : P(P) {} 
- 
-    void write(uint32_t i) { P->write(i); } 
-    void writeString(StringRef s) { P->writeString(s); } 
-    void writeBytes(const char *Bytes, int Size) { P->writeBytes(Bytes, Size); } 
-  }; 
- 
-  class GCOVFunction; 
-  class GCOVBlock; 
- 
-  // Constructed only by requesting it from a GCOVBlock, this object stores a 
-  // list of line numbers and a single filename, representing lines that belong 
-  // to the block. 
-  class GCOVLines : public GCOVRecord { 
-   public: 
-    void addLine(uint32_t Line) { 
-      assert(Line != 0 && "Line zero is not a valid real line number."); 
-      Lines.push_back(Line); 
-    } 
- 
-    uint32_t length() const { 
-      return 1 + wordsOfString(Filename) + Lines.size(); 
-    } 
- 
-    void writeOut() { 
-      write(0); 
-      writeString(Filename); 
-      for (int i = 0, e = Lines.size(); i != e; ++i) 
-        write(Lines[i]); 
-    } 
- 
-    GCOVLines(GCOVProfiler *P, StringRef F) 
-        : GCOVRecord(P), Filename(std::string(F)) {} 
- 
-  private: 
-    std::string Filename; 
-    SmallVector<uint32_t, 32> Lines; 
-  }; 
- 
- 
-  // Represent a basic block in GCOV. Each block has a unique number in the 
-  // function, number of lines belonging to each block, and a set of edges to 
-  // other blocks. 
-  class GCOVBlock : public GCOVRecord { 
-   public: 
-    GCOVLines &getFile(StringRef Filename) { 
-      return LinesByFile.try_emplace(Filename, P, Filename).first->second; 
-    } 
- 
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(
+    GCOVProfilerLegacyPass, "insert-gcov-profiling",
+    "Insert instrumentation for GCOV profiling", false, false)
+
+ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
+  return new GCOVProfilerLegacyPass(Options);
+}
+
+static StringRef getFunctionName(const DISubprogram *SP) {
+  if (!SP->getLinkageName().empty())
+    return SP->getLinkageName();
+  return SP->getName();
+}
+
+/// Extract a filename for a DISubprogram.
+///
+/// Prefer relative paths in the coverage notes. Clang also may split
+/// up absolute paths into a directory and filename component. When
+/// the relative path doesn't exist, reconstruct the absolute path.
+static SmallString<128> getFilename(const DISubprogram *SP) {
+  SmallString<128> Path;
+  StringRef RelPath = SP->getFilename();
+  if (sys::fs::exists(RelPath))
+    Path = RelPath;
+  else
+    sys::path::append(Path, SP->getDirectory(), SP->getFilename());
+  return Path;
+}
+
+namespace {
+  class GCOVRecord {
+  protected:
+    GCOVProfiler *P;
+
+    GCOVRecord(GCOVProfiler *P) : P(P) {}
+
+    void write(uint32_t i) { P->write(i); }
+    void writeString(StringRef s) { P->writeString(s); }
+    void writeBytes(const char *Bytes, int Size) { P->writeBytes(Bytes, Size); }
+  };
+
+  class GCOVFunction;
+  class GCOVBlock;
+
+  // Constructed only by requesting it from a GCOVBlock, this object stores a
+  // list of line numbers and a single filename, representing lines that belong
+  // to the block.
+  class GCOVLines : public GCOVRecord {
+   public:
+    void addLine(uint32_t Line) {
+      assert(Line != 0 && "Line zero is not a valid real line number.");
+      Lines.push_back(Line);
+    }
+
+    uint32_t length() const {
+      return 1 + wordsOfString(Filename) + Lines.size();
+    }
+
+    void writeOut() {
+      write(0);
+      writeString(Filename);
+      for (int i = 0, e = Lines.size(); i != e; ++i)
+        write(Lines[i]);
+    }
+
+    GCOVLines(GCOVProfiler *P, StringRef F)
+        : GCOVRecord(P), Filename(std::string(F)) {}
+
+  private:
+    std::string Filename;
+    SmallVector<uint32_t, 32> Lines;
+  };
+
+
+  // Represent a basic block in GCOV. Each block has a unique number in the
+  // function, number of lines belonging to each block, and a set of edges to
+  // other blocks.
+  class GCOVBlock : public GCOVRecord {
+   public:
+    GCOVLines &getFile(StringRef Filename) {
+      return LinesByFile.try_emplace(Filename, P, Filename).first->second;
+    }
+
     void addEdge(GCOVBlock &Successor, uint32_t Flags) {
       OutEdges.emplace_back(&Successor, Flags);
-    } 
- 
-    void writeOut() { 
-      uint32_t Len = 3; 
-      SmallVector<StringMapEntry<GCOVLines> *, 32> SortedLinesByFile; 
-      for (auto &I : LinesByFile) { 
-        Len += I.second.length(); 
-        SortedLinesByFile.push_back(&I); 
-      } 
- 
-      write(GCOV_TAG_LINES); 
-      write(Len); 
-      write(Number); 
- 
-      llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS, 
-                                       StringMapEntry<GCOVLines> *RHS) { 
-        return LHS->getKey() < RHS->getKey(); 
-      }); 
-      for (auto &I : SortedLinesByFile) 
-        I->getValue().writeOut(); 
-      write(0); 
-      write(0); 
-    } 
- 
-    GCOVBlock(const GCOVBlock &RHS) : GCOVRecord(RHS), Number(RHS.Number) { 
-      // Only allow copy before edges and lines have been added. After that, 
-      // there are inter-block pointers (eg: edges) that won't take kindly to 
-      // blocks being copied or moved around. 
-      assert(LinesByFile.empty()); 
-      assert(OutEdges.empty()); 
-    } 
- 
+    }
+
+    void writeOut() {
+      uint32_t Len = 3;
+      SmallVector<StringMapEntry<GCOVLines> *, 32> SortedLinesByFile;
+      for (auto &I : LinesByFile) {
+        Len += I.second.length();
+        SortedLinesByFile.push_back(&I);
+      }
+
+      write(GCOV_TAG_LINES);
+      write(Len);
+      write(Number);
+
+      llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS,
+                                       StringMapEntry<GCOVLines> *RHS) {
+        return LHS->getKey() < RHS->getKey();
+      });
+      for (auto &I : SortedLinesByFile)
+        I->getValue().writeOut();
+      write(0);
+      write(0);
+    }
+
+    GCOVBlock(const GCOVBlock &RHS) : GCOVRecord(RHS), Number(RHS.Number) {
+      // Only allow copy before edges and lines have been added. After that,
+      // there are inter-block pointers (eg: edges) that won't take kindly to
+      // blocks being copied or moved around.
+      assert(LinesByFile.empty());
+      assert(OutEdges.empty());
+    }
+
     uint32_t Number;
     SmallVector<std::pair<GCOVBlock *, uint32_t>, 4> OutEdges;
 
   private:
-    friend class GCOVFunction; 
- 
-    GCOVBlock(GCOVProfiler *P, uint32_t Number) 
-        : GCOVRecord(P), Number(Number) {} 
- 
-    StringMap<GCOVLines> LinesByFile; 
-  }; 
- 
-  // A function has a unique identifier, a checksum (we leave as zero) and a 
-  // set of blocks and a map of edges between blocks. This is the only GCOV 
-  // object users can construct, the blocks and lines will be rooted here. 
-  class GCOVFunction : public GCOVRecord { 
-  public: 
-    GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP, 
-                 unsigned EndLine, uint32_t Ident, int Version) 
-        : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident), 
+    friend class GCOVFunction;
+
+    GCOVBlock(GCOVProfiler *P, uint32_t Number)
+        : GCOVRecord(P), Number(Number) {}
+
+    StringMap<GCOVLines> LinesByFile;
+  };
+
+  // A function has a unique identifier, a checksum (we leave as zero) and a
+  // set of blocks and a map of edges between blocks. This is the only GCOV
+  // object users can construct, the blocks and lines will be rooted here.
+  class GCOVFunction : public GCOVRecord {
+  public:
+    GCOVFunction(GCOVProfiler *P, Function *F, const DISubprogram *SP,
+                 unsigned EndLine, uint32_t Ident, int Version)
+        : GCOVRecord(P), SP(SP), EndLine(EndLine), Ident(Ident),
           Version(Version), EntryBlock(P, 0), ReturnBlock(P, 1) {
-      LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n"); 
-      bool ExitBlockBeforeBody = Version >= 48; 
+      LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
+      bool ExitBlockBeforeBody = Version >= 48;
       uint32_t i = ExitBlockBeforeBody ? 2 : 1;
       for (BasicBlock &BB : *F)
-        Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++))); 
-      if (!ExitBlockBeforeBody) 
-        ReturnBlock.Number = i; 
- 
-      std::string FunctionNameAndLine; 
-      raw_string_ostream FNLOS(FunctionNameAndLine); 
-      FNLOS << getFunctionName(SP) << SP->getLine(); 
-      FNLOS.flush(); 
-      FuncChecksum = hash_value(FunctionNameAndLine); 
-    } 
- 
+        Blocks.insert(std::make_pair(&BB, GCOVBlock(P, i++)));
+      if (!ExitBlockBeforeBody)
+        ReturnBlock.Number = i;
+
+      std::string FunctionNameAndLine;
+      raw_string_ostream FNLOS(FunctionNameAndLine);
+      FNLOS << getFunctionName(SP) << SP->getLine();
+      FNLOS.flush();
+      FuncChecksum = hash_value(FunctionNameAndLine);
+    }
+
     GCOVBlock &getBlock(const BasicBlock *BB) {
       return Blocks.find(const_cast<BasicBlock *>(BB))->second;
-    } 
- 
+    }
+
     GCOVBlock &getEntryBlock() { return EntryBlock; }
-    GCOVBlock &getReturnBlock() { 
-      return ReturnBlock; 
-    } 
- 
-    uint32_t getFuncChecksum() const { 
-      return FuncChecksum; 
-    } 
- 
-    void writeOut(uint32_t CfgChecksum) { 
-      write(GCOV_TAG_FUNCTION); 
-      SmallString<128> Filename = getFilename(SP); 
-      uint32_t BlockLen = 
-          2 + (Version >= 47) + wordsOfString(getFunctionName(SP)); 
-      if (Version < 80) 
-        BlockLen += wordsOfString(Filename) + 1; 
-      else 
-        BlockLen += 1 + wordsOfString(Filename) + 3 + (Version >= 90); 
- 
-      write(BlockLen); 
-      write(Ident); 
-      write(FuncChecksum); 
-      if (Version >= 47) 
-        write(CfgChecksum); 
-      writeString(getFunctionName(SP)); 
-      if (Version < 80) { 
-        writeString(Filename); 
-        write(SP->getLine()); 
-      } else { 
-        write(SP->isArtificial()); // artificial 
-        writeString(Filename); 
-        write(SP->getLine()); // start_line 
-        write(0);             // start_column 
-        // EndLine is the last line with !dbg. It is not the } line as in GCC, 
-        // but good enough. 
-        write(EndLine); 
-        if (Version >= 90) 
-          write(0); // end_column 
-      } 
- 
-      // Emit count of blocks. 
-      write(GCOV_TAG_BLOCKS); 
-      if (Version < 80) { 
+    GCOVBlock &getReturnBlock() {
+      return ReturnBlock;
+    }
+
+    uint32_t getFuncChecksum() const {
+      return FuncChecksum;
+    }
+
+    void writeOut(uint32_t CfgChecksum) {
+      write(GCOV_TAG_FUNCTION);
+      SmallString<128> Filename = getFilename(SP);
+      uint32_t BlockLen =
+          2 + (Version >= 47) + wordsOfString(getFunctionName(SP));
+      if (Version < 80)
+        BlockLen += wordsOfString(Filename) + 1;
+      else
+        BlockLen += 1 + wordsOfString(Filename) + 3 + (Version >= 90);
+
+      write(BlockLen);
+      write(Ident);
+      write(FuncChecksum);
+      if (Version >= 47)
+        write(CfgChecksum);
+      writeString(getFunctionName(SP));
+      if (Version < 80) {
+        writeString(Filename);
+        write(SP->getLine());
+      } else {
+        write(SP->isArtificial()); // artificial
+        writeString(Filename);
+        write(SP->getLine()); // start_line
+        write(0);             // start_column
+        // EndLine is the last line with !dbg. It is not the } line as in GCC,
+        // but good enough.
+        write(EndLine);
+        if (Version >= 90)
+          write(0); // end_column
+      }
+
+      // Emit count of blocks.
+      write(GCOV_TAG_BLOCKS);
+      if (Version < 80) {
         write(Blocks.size() + 2);
         for (int i = Blocks.size() + 2; i; --i)
-          write(0); 
-      } else { 
-        write(1); 
+          write(0);
+      } else {
+        write(1);
         write(Blocks.size() + 2);
-      } 
-      LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n"); 
- 
-      // Emit edges between blocks. 
+      }
+      LLVM_DEBUG(dbgs() << (Blocks.size() + 1) << " blocks\n");
+
+      // Emit edges between blocks.
       const uint32_t Outgoing = EntryBlock.OutEdges.size();
       if (Outgoing) {
         write(GCOV_TAG_ARCS);
@@ -465,169 +465,169 @@ namespace {
       }
       for (auto &It : Blocks) {
         const GCOVBlock &Block = It.second;
-        if (Block.OutEdges.empty()) continue; 
- 
-        write(GCOV_TAG_ARCS); 
-        write(Block.OutEdges.size() * 2 + 1); 
-        write(Block.Number); 
+        if (Block.OutEdges.empty()) continue;
+
+        write(GCOV_TAG_ARCS);
+        write(Block.OutEdges.size() * 2 + 1);
+        write(Block.Number);
         for (const auto &E : Block.OutEdges) {
           write(E.first->Number);
           write(E.second);
-        } 
-      } 
- 
-      // Emit lines for each block. 
+        }
+      }
+
+      // Emit lines for each block.
       for (auto &It : Blocks)
         It.second.writeOut();
-    } 
- 
+    }
+
   public:
-    const DISubprogram *SP; 
-    unsigned EndLine; 
-    uint32_t Ident; 
-    uint32_t FuncChecksum; 
-    int Version; 
+    const DISubprogram *SP;
+    unsigned EndLine;
+    uint32_t Ident;
+    uint32_t FuncChecksum;
+    int Version;
     MapVector<BasicBlock *, GCOVBlock> Blocks;
     GCOVBlock EntryBlock;
-    GCOVBlock ReturnBlock; 
-  }; 
-} 
- 
-// RegexesStr is a string containing differents regex separated by a semi-colon. 
-// For example "foo\..*$;bar\..*$". 
-std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) { 
-  std::vector<Regex> Regexes; 
-  while (!RegexesStr.empty()) { 
-    std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';'); 
-    if (!HeadTail.first.empty()) { 
-      Regex Re(HeadTail.first); 
-      std::string Err; 
-      if (!Re.isValid(Err)) { 
-        Ctx->emitError(Twine("Regex ") + HeadTail.first + 
-                       " is not valid: " + Err); 
-      } 
-      Regexes.emplace_back(std::move(Re)); 
-    } 
-    RegexesStr = HeadTail.second; 
-  } 
-  return Regexes; 
-} 
- 
-bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename, 
-                                           std::vector<Regex> &Regexes) { 
-  for (Regex &Re : Regexes) 
-    if (Re.match(Filename)) 
-      return true; 
-  return false; 
-} 
- 
-bool GCOVProfiler::isFunctionInstrumented(const Function &F) { 
-  if (FilterRe.empty() && ExcludeRe.empty()) { 
-    return true; 
-  } 
-  SmallString<128> Filename = getFilename(F.getSubprogram()); 
-  auto It = InstrumentedFiles.find(Filename); 
-  if (It != InstrumentedFiles.end()) { 
-    return It->second; 
-  } 
- 
-  SmallString<256> RealPath; 
-  StringRef RealFilename; 
- 
-  // Path can be 
-  // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for 
-  // such a case we must get the real_path. 
-  if (sys::fs::real_path(Filename, RealPath)) { 
-    // real_path can fail with path like "foo.c". 
-    RealFilename = Filename; 
-  } else { 
-    RealFilename = RealPath; 
-  } 
- 
-  bool ShouldInstrument; 
-  if (FilterRe.empty()) { 
-    ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe); 
-  } else if (ExcludeRe.empty()) { 
-    ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe); 
-  } else { 
-    ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) && 
-                       !doesFilenameMatchARegex(RealFilename, ExcludeRe); 
-  } 
-  InstrumentedFiles[Filename] = ShouldInstrument; 
-  return ShouldInstrument; 
-} 
- 
-std::string GCOVProfiler::mangleName(const DICompileUnit *CU, 
-                                     GCovFileType OutputType) { 
-  bool Notes = OutputType == GCovFileType::GCNO; 
- 
-  if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) { 
-    for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) { 
-      MDNode *N = GCov->getOperand(i); 
-      bool ThreeElement = N->getNumOperands() == 3; 
-      if (!ThreeElement && N->getNumOperands() != 2) 
-        continue; 
-      if (dyn_cast<MDNode>(N->getOperand(ThreeElement ? 2 : 1)) != CU) 
-        continue; 
- 
-      if (ThreeElement) { 
-        // These nodes have no mangling to apply, it's stored mangled in the 
-        // bitcode. 
-        MDString *NotesFile = dyn_cast<MDString>(N->getOperand(0)); 
-        MDString *DataFile = dyn_cast<MDString>(N->getOperand(1)); 
-        if (!NotesFile || !DataFile) 
-          continue; 
-        return std::string(Notes ? NotesFile->getString() 
-                                 : DataFile->getString()); 
-      } 
- 
-      MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0)); 
-      if (!GCovFile) 
-        continue; 
- 
-      SmallString<128> Filename = GCovFile->getString(); 
-      sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda"); 
-      return std::string(Filename.str()); 
-    } 
-  } 
- 
-  SmallString<128> Filename = CU->getFilename(); 
-  sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda"); 
-  StringRef FName = sys::path::filename(Filename); 
-  SmallString<128> CurPath; 
-  if (sys::fs::current_path(CurPath)) 
-    return std::string(FName); 
-  sys::path::append(CurPath, FName); 
-  return std::string(CurPath.str()); 
-} 
- 
-bool GCOVProfiler::runOnModule( 
+    GCOVBlock ReturnBlock;
+  };
+}
+
+// RegexesStr is a string containing differents regex separated by a semi-colon.
+// For example "foo\..*$;bar\..*$".
+std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) {
+  std::vector<Regex> Regexes;
+  while (!RegexesStr.empty()) {
+    std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';');
+    if (!HeadTail.first.empty()) {
+      Regex Re(HeadTail.first);
+      std::string Err;
+      if (!Re.isValid(Err)) {
+        Ctx->emitError(Twine("Regex ") + HeadTail.first +
+                       " is not valid: " + Err);
+      }
+      Regexes.emplace_back(std::move(Re));
+    }
+    RegexesStr = HeadTail.second;
+  }
+  return Regexes;
+}
+
+bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename,
+                                           std::vector<Regex> &Regexes) {
+  for (Regex &Re : Regexes)
+    if (Re.match(Filename))
+      return true;
+  return false;
+}
+
+bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
+  if (FilterRe.empty() && ExcludeRe.empty()) {
+    return true;
+  }
+  SmallString<128> Filename = getFilename(F.getSubprogram());
+  auto It = InstrumentedFiles.find(Filename);
+  if (It != InstrumentedFiles.end()) {
+    return It->second;
+  }
+
+  SmallString<256> RealPath;
+  StringRef RealFilename;
+
+  // Path can be
+  // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
+  // such a case we must get the real_path.
+  if (sys::fs::real_path(Filename, RealPath)) {
+    // real_path can fail with path like "foo.c".
+    RealFilename = Filename;
+  } else {
+    RealFilename = RealPath;
+  }
+
+  bool ShouldInstrument;
+  if (FilterRe.empty()) {
+    ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe);
+  } else if (ExcludeRe.empty()) {
+    ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe);
+  } else {
+    ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) &&
+                       !doesFilenameMatchARegex(RealFilename, ExcludeRe);
+  }
+  InstrumentedFiles[Filename] = ShouldInstrument;
+  return ShouldInstrument;
+}
+
+std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
+                                     GCovFileType OutputType) {
+  bool Notes = OutputType == GCovFileType::GCNO;
+
+  if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
+    for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
+      MDNode *N = GCov->getOperand(i);
+      bool ThreeElement = N->getNumOperands() == 3;
+      if (!ThreeElement && N->getNumOperands() != 2)
+        continue;
+      if (dyn_cast<MDNode>(N->getOperand(ThreeElement ? 2 : 1)) != CU)
+        continue;
+
+      if (ThreeElement) {
+        // These nodes have no mangling to apply, it's stored mangled in the
+        // bitcode.
+        MDString *NotesFile = dyn_cast<MDString>(N->getOperand(0));
+        MDString *DataFile = dyn_cast<MDString>(N->getOperand(1));
+        if (!NotesFile || !DataFile)
+          continue;
+        return std::string(Notes ? NotesFile->getString()
+                                 : DataFile->getString());
+      }
+
+      MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
+      if (!GCovFile)
+        continue;
+
+      SmallString<128> Filename = GCovFile->getString();
+      sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
+      return std::string(Filename.str());
+    }
+  }
+
+  SmallString<128> Filename = CU->getFilename();
+  sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
+  StringRef FName = sys::path::filename(Filename);
+  SmallString<128> CurPath;
+  if (sys::fs::current_path(CurPath))
+    return std::string(FName);
+  sys::path::append(CurPath, FName);
+  return std::string(CurPath.str());
+}
+
+bool GCOVProfiler::runOnModule(
     Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
     std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
-  this->M = &M; 
-  this->GetTLI = std::move(GetTLI); 
-  Ctx = &M.getContext(); 
- 
+  this->M = &M;
+  this->GetTLI = std::move(GetTLI);
+  Ctx = &M.getContext();
+
   NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
   if (!CUNode || (!Options.EmitNotes && !Options.EmitData))
     return false;
- 
+
   bool HasExecOrFork = AddFlushBeforeForkAndExec();
 
-  FilterRe = createRegexesFromString(Options.Filter); 
-  ExcludeRe = createRegexesFromString(Options.Exclude); 
+  FilterRe = createRegexesFromString(Options.Filter);
+  ExcludeRe = createRegexesFromString(Options.Exclude);
   emitProfileNotes(CUNode, HasExecOrFork, GetBFI, GetBPI, this->GetTLI);
   return true;
-} 
- 
-PreservedAnalyses GCOVProfilerPass::run(Module &M, 
-                                        ModuleAnalysisManager &AM) { 
- 
-  GCOVProfiler Profiler(GCOVOpts); 
-  FunctionAnalysisManager &FAM = 
-      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 
- 
+}
+
+PreservedAnalyses GCOVProfilerPass::run(Module &M,
+                                        ModuleAnalysisManager &AM) {
+
+  GCOVProfiler Profiler(GCOVOpts);
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
   auto GetBFI = [&FAM](Function &F) {
     return &FAM.getResult<BlockFrequencyAnalysis>(F);
   };
@@ -639,124 +639,124 @@ PreservedAnalyses GCOVProfilerPass::run(Module &M,
   };
 
   if (!Profiler.runOnModule(M, GetBFI, GetBPI, GetTLI))
-    return PreservedAnalyses::all(); 
- 
-  return PreservedAnalyses::none(); 
-} 
- 
-static bool functionHasLines(const Function &F, unsigned &EndLine) { 
-  // Check whether this function actually has any source lines. Not only 
-  // do these waste space, they also can crash gcov. 
-  EndLine = 0; 
-  for (auto &BB : F) { 
-    for (auto &I : BB) { 
-      // Debug intrinsic locations correspond to the location of the 
-      // declaration, not necessarily any statements or expressions. 
-      if (isa<DbgInfoIntrinsic>(&I)) continue; 
- 
-      const DebugLoc &Loc = I.getDebugLoc(); 
-      if (!Loc) 
-        continue; 
- 
-      // Artificial lines such as calls to the global constructors. 
-      if (Loc.getLine() == 0) continue; 
-      EndLine = std::max(EndLine, Loc.getLine()); 
- 
-      return true; 
-    } 
-  } 
-  return false; 
-} 
- 
-static bool isUsingScopeBasedEH(Function &F) { 
-  if (!F.hasPersonalityFn()) return false; 
- 
-  EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); 
-  return isScopedEHPersonality(Personality); 
-} 
- 
-bool GCOVProfiler::AddFlushBeforeForkAndExec() { 
-  SmallVector<CallInst *, 2> Forks; 
-  SmallVector<CallInst *, 2> Execs; 
-  for (auto &F : M->functions()) { 
-    auto *TLI = &GetTLI(F); 
-    for (auto &I : instructions(F)) { 
-      if (CallInst *CI = dyn_cast<CallInst>(&I)) { 
-        if (Function *Callee = CI->getCalledFunction()) { 
-          LibFunc LF; 
-          if (TLI->getLibFunc(*Callee, LF)) { 
-            if (LF == LibFunc_fork) { 
-#if !defined(_WIN32) 
-              Forks.push_back(CI); 
-#endif 
-            } else if (LF == LibFunc_execl || LF == LibFunc_execle || 
-                       LF == LibFunc_execlp || LF == LibFunc_execv || 
-                       LF == LibFunc_execvp || LF == LibFunc_execve || 
-                       LF == LibFunc_execvpe || LF == LibFunc_execvP) { 
-              Execs.push_back(CI); 
-            } 
-          } 
-        } 
-      } 
-    } 
-  } 
- 
-  for (auto F : Forks) { 
-    IRBuilder<> Builder(F); 
-    BasicBlock *Parent = F->getParent(); 
-    auto NextInst = ++F->getIterator(); 
- 
-    // We've a fork so just reset the counters in the child process 
-    FunctionType *FTy = FunctionType::get(Builder.getInt32Ty(), {}, false); 
-    FunctionCallee GCOVFork = M->getOrInsertFunction("__gcov_fork", FTy); 
-    F->setCalledFunction(GCOVFork); 
- 
-    // We split just after the fork to have a counter for the lines after 
-    // Anyway there's a bug: 
-    // void foo() { fork(); } 
-    // void bar() { foo(); blah(); } 
-    // then "blah();" will be called 2 times but showed as 1 
-    // because "blah()" belongs to the same block as "foo();" 
-    Parent->splitBasicBlock(NextInst); 
- 
-    // back() is a br instruction with a debug location 
-    // equals to the one from NextAfterFork 
-    // So to avoid to have two debug locs on two blocks just change it 
-    DebugLoc Loc = F->getDebugLoc(); 
-    Parent->back().setDebugLoc(Loc); 
-  } 
- 
-  for (auto E : Execs) { 
-    IRBuilder<> Builder(E); 
-    BasicBlock *Parent = E->getParent(); 
-    auto NextInst = ++E->getIterator(); 
- 
-    // Since the process is replaced by a new one we need to write out gcdas 
-    // No need to reset the counters since they'll be lost after the exec** 
-    FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false); 
-    FunctionCallee WriteoutF = 
-        M->getOrInsertFunction("llvm_writeout_files", FTy); 
-    Builder.CreateCall(WriteoutF); 
- 
-    DebugLoc Loc = E->getDebugLoc(); 
-    Builder.SetInsertPoint(&*NextInst); 
-    // If the exec** fails we must reset the counters since they've been 
-    // dumped 
-    FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy); 
-    Builder.CreateCall(ResetF)->setDebugLoc(Loc); 
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
+
+static bool functionHasLines(const Function &F, unsigned &EndLine) {
+  // Check whether this function actually has any source lines. Not only
+  // do these waste space, they also can crash gcov.
+  EndLine = 0;
+  for (auto &BB : F) {
+    for (auto &I : BB) {
+      // Debug intrinsic locations correspond to the location of the
+      // declaration, not necessarily any statements or expressions.
+      if (isa<DbgInfoIntrinsic>(&I)) continue;
+
+      const DebugLoc &Loc = I.getDebugLoc();
+      if (!Loc)
+        continue;
+
+      // Artificial lines such as calls to the global constructors.
+      if (Loc.getLine() == 0) continue;
+      EndLine = std::max(EndLine, Loc.getLine());
+
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool isUsingScopeBasedEH(Function &F) {
+  if (!F.hasPersonalityFn()) return false;
+
+  EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
+  return isScopedEHPersonality(Personality);
+}
+
+bool GCOVProfiler::AddFlushBeforeForkAndExec() {
+  SmallVector<CallInst *, 2> Forks;
+  SmallVector<CallInst *, 2> Execs;
+  for (auto &F : M->functions()) {
+    auto *TLI = &GetTLI(F);
+    for (auto &I : instructions(F)) {
+      if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+        if (Function *Callee = CI->getCalledFunction()) {
+          LibFunc LF;
+          if (TLI->getLibFunc(*Callee, LF)) {
+            if (LF == LibFunc_fork) {
+#if !defined(_WIN32)
+              Forks.push_back(CI);
+#endif
+            } else if (LF == LibFunc_execl || LF == LibFunc_execle ||
+                       LF == LibFunc_execlp || LF == LibFunc_execv ||
+                       LF == LibFunc_execvp || LF == LibFunc_execve ||
+                       LF == LibFunc_execvpe || LF == LibFunc_execvP) {
+              Execs.push_back(CI);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  for (auto F : Forks) {
+    IRBuilder<> Builder(F);
+    BasicBlock *Parent = F->getParent();
+    auto NextInst = ++F->getIterator();
+
+    // We've a fork so just reset the counters in the child process
+    FunctionType *FTy = FunctionType::get(Builder.getInt32Ty(), {}, false);
+    FunctionCallee GCOVFork = M->getOrInsertFunction("__gcov_fork", FTy);
+    F->setCalledFunction(GCOVFork);
+
+    // We split just after the fork to have a counter for the lines after
+    // Anyway there's a bug:
+    // void foo() { fork(); }
+    // void bar() { foo(); blah(); }
+    // then "blah();" will be called 2 times but showed as 1
+    // because "blah()" belongs to the same block as "foo();"
+    Parent->splitBasicBlock(NextInst);
+
+    // back() is a br instruction with a debug location
+    // equals to the one from NextAfterFork
+    // So to avoid to have two debug locs on two blocks just change it
+    DebugLoc Loc = F->getDebugLoc();
+    Parent->back().setDebugLoc(Loc);
+  }
+
+  for (auto E : Execs) {
+    IRBuilder<> Builder(E);
+    BasicBlock *Parent = E->getParent();
+    auto NextInst = ++E->getIterator();
+
+    // Since the process is replaced by a new one we need to write out gcdas
+    // No need to reset the counters since they'll be lost after the exec**
+    FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
+    FunctionCallee WriteoutF =
+        M->getOrInsertFunction("llvm_writeout_files", FTy);
+    Builder.CreateCall(WriteoutF);
+
+    DebugLoc Loc = E->getDebugLoc();
+    Builder.SetInsertPoint(&*NextInst);
+    // If the exec** fails we must reset the counters since they've been
+    // dumped
+    FunctionCallee ResetF = M->getOrInsertFunction("llvm_reset_counters", FTy);
+    Builder.CreateCall(ResetF)->setDebugLoc(Loc);
     ExecBlocks.insert(Parent);
-    Parent->splitBasicBlock(NextInst); 
-    Parent->back().setDebugLoc(Loc); 
-  } 
- 
-  return !Forks.empty() || !Execs.empty(); 
-} 
- 
+    Parent->splitBasicBlock(NextInst);
+    Parent->back().setDebugLoc(Loc);
+  }
+
+  return !Forks.empty() || !Execs.empty();
+}
+
 static BasicBlock *getInstrBB(CFGMST<Edge, BBInfo> &MST, Edge &E,
                               const DenseSet<const BasicBlock *> &ExecBlocks) {
   if (E.InMST || E.Removed)
     return nullptr;
- 
+
   BasicBlock *SrcBB = const_cast<BasicBlock *>(E.SrcBB);
   BasicBlock *DestBB = const_cast<BasicBlock *>(E.DestBB);
   // For a fake edge, instrument the real BB.
@@ -813,42 +813,42 @@ bool GCOVProfiler::emitProfileNotes(
     function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
     function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
     function_ref<const TargetLibraryInfo &(Function &F)> GetTLI) {
-  int Version; 
-  { 
-    uint8_t c3 = Options.Version[0]; 
-    uint8_t c2 = Options.Version[1]; 
-    uint8_t c1 = Options.Version[2]; 
-    Version = c3 >= 'A' ? (c3 - 'A') * 100 + (c2 - '0') * 10 + c1 - '0' 
-                        : (c3 - '0') * 10 + c1 - '0'; 
-  } 
- 
+  int Version;
+  {
+    uint8_t c3 = Options.Version[0];
+    uint8_t c2 = Options.Version[1];
+    uint8_t c1 = Options.Version[2];
+    Version = c3 >= 'A' ? (c3 - 'A') * 100 + (c2 - '0') * 10 + c1 - '0'
+                        : (c3 - '0') * 10 + c1 - '0';
+  }
+
   bool EmitGCDA = Options.EmitData;
   for (unsigned i = 0, e = CUNode->getNumOperands(); i != e; ++i) {
-    // Each compile unit gets its own .gcno file. This means that whether we run 
-    // this pass over the original .o's as they're produced, or run it after 
-    // LTO, we'll generate the same .gcno files. 
- 
+    // Each compile unit gets its own .gcno file. This means that whether we run
+    // this pass over the original .o's as they're produced, or run it after
+    // LTO, we'll generate the same .gcno files.
+
     auto *CU = cast<DICompileUnit>(CUNode->getOperand(i));
- 
-    // Skip module skeleton (and module) CUs. 
-    if (CU->getDWOId()) 
-      continue; 
- 
+
+    // Skip module skeleton (and module) CUs.
+    if (CU->getDWOId())
+      continue;
+
     std::vector<uint8_t> EdgeDestinations;
     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
- 
-    Endian = M->getDataLayout().isLittleEndian() ? support::endianness::little 
-                                                 : support::endianness::big; 
-    unsigned FunctionIdent = 0; 
-    for (auto &F : M->functions()) { 
-      DISubprogram *SP = F.getSubprogram(); 
-      unsigned EndLine; 
-      if (!SP) continue; 
-      if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F)) 
-        continue; 
-      // TODO: Functions using scope-based EH are currently not supported. 
-      if (isUsingScopeBasedEH(F)) continue; 
- 
+
+    Endian = M->getDataLayout().isLittleEndian() ? support::endianness::little
+                                                 : support::endianness::big;
+    unsigned FunctionIdent = 0;
+    for (auto &F : M->functions()) {
+      DISubprogram *SP = F.getSubprogram();
+      unsigned EndLine;
+      if (!SP) continue;
+      if (!functionHasLines(F, EndLine) || !isFunctionInstrumented(F))
+        continue;
+      // TODO: Functions using scope-based EH are currently not supported.
+      if (isUsingScopeBasedEH(F)) continue;
+
       // Add the function line number to the lines of the entry block
       // to have a counter for the function definition.
       uint32_t Line = SP->getLine();
@@ -873,11 +873,11 @@ bool GCOVProfiler::emitProfileNotes(
         E.Place = getInstrBB(MST, E, ExecBlocks);
       }
       // Basic blocks in F are finalized at this point.
-      BasicBlock &EntryBlock = F.getEntryBlock(); 
-      Funcs.push_back(std::make_unique<GCOVFunction>(this, &F, SP, EndLine, 
-                                                     FunctionIdent++, Version)); 
-      GCOVFunction &Func = *Funcs.back(); 
- 
+      BasicBlock &EntryBlock = F.getEntryBlock();
+      Funcs.push_back(std::make_unique<GCOVFunction>(this, &F, SP, EndLine,
+                                                     FunctionIdent++, Version));
+      GCOVFunction &Func = *Funcs.back();
+
       // Some non-tree edges are IndirectBr which cannot be split. Ignore them
       // as well.
       llvm::erase_if(MST.AllEdges, [](std::unique_ptr<Edge> &E) {
@@ -903,7 +903,7 @@ bool GCOVProfiler::emitProfileNotes(
             return L->SrcNumber != R->SrcNumber ? L->SrcNumber < R->SrcNumber
                                                 : L->DstNumber < R->DstNumber;
           });
- 
+
       for (const Edge &E : make_pointee_range(MST.AllEdges)) {
         GCOVBlock &Src =
             E.SrcBB ? Func.getBlock(E.SrcBB) : Func.getEntryBlock();
@@ -912,10 +912,10 @@ bool GCOVProfiler::emitProfileNotes(
         Src.addEdge(Dst, E.Place ? 0 : uint32_t(GCOV_ARC_ON_TREE));
       }
 
-      // Artificial functions such as global initializers 
-      if (!SP->isArtificial()) 
-        Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line); 
- 
+      // Artificial functions such as global initializers
+      if (!SP->isArtificial())
+        Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
+
       LLVM_DEBUG(dumpEdges(MST, Func));
 
       for (auto &GB : Func.Blocks) {
@@ -925,31 +925,31 @@ bool GCOVProfiler::emitProfileNotes(
           uint32_t Idx = Succ.first->Number;
           do EdgeDestinations.push_back(Idx & 255);
           while ((Idx >>= 8) > 0);
-        } 
- 
-        for (auto &I : BB) { 
-          // Debug intrinsic locations correspond to the location of the 
-          // declaration, not necessarily any statements or expressions. 
-          if (isa<DbgInfoIntrinsic>(&I)) continue; 
- 
-          const DebugLoc &Loc = I.getDebugLoc(); 
-          if (!Loc) 
-            continue; 
- 
-          // Artificial lines such as calls to the global constructors. 
-          if (Loc.getLine() == 0 || Loc.isImplicitCode()) 
-            continue; 
- 
-          if (Line == Loc.getLine()) continue; 
-          Line = Loc.getLine(); 
-          if (SP != getDISubprogram(Loc.getScope())) 
-            continue; 
- 
-          GCOVLines &Lines = Block.getFile(Filename); 
-          Lines.addLine(Loc.getLine()); 
-        } 
-        Line = 0; 
-      } 
+        }
+
+        for (auto &I : BB) {
+          // Debug intrinsic locations correspond to the location of the
+          // declaration, not necessarily any statements or expressions.
+          if (isa<DbgInfoIntrinsic>(&I)) continue;
+
+          const DebugLoc &Loc = I.getDebugLoc();
+          if (!Loc)
+            continue;
+
+          // Artificial lines such as calls to the global constructors.
+          if (Loc.getLine() == 0 || Loc.isImplicitCode())
+            continue;
+
+          if (Line == Loc.getLine()) continue;
+          Line = Loc.getLine();
+          if (SP != getDISubprogram(Loc.getScope()))
+            continue;
+
+          GCOVLines &Lines = Block.getFile(Filename);
+          Lines.addLine(Loc.getLine());
+        }
+        Line = 0;
+      }
       if (EmitGCDA) {
         DISubprogram *SP = F.getSubprogram();
         ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(*Ctx), Measured);
@@ -974,14 +974,14 @@ bool GCOVProfiler::emitProfileNotes(
           }
         }
       }
-    } 
- 
-    char Tmp[4]; 
+    }
+
+    char Tmp[4];
     JamCRC JC;
     JC.update(EdgeDestinations);
     uint32_t Stamp = JC.getCRC();
-    FileChecksums.push_back(Stamp); 
- 
+    FileChecksums.push_back(Stamp);
+
     if (Options.EmitNotes) {
       std::error_code EC;
       raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
@@ -990,8 +990,8 @@ bool GCOVProfiler::emitProfileNotes(
         Ctx->emitError(
             Twine("failed to open coverage notes file for writing: ") +
             EC.message());
-        continue; 
-      } 
+        continue;
+      }
       os = &out;
       if (Endian == support::endianness::big) {
         out.write("gcno", 4);
@@ -1006,28 +1006,28 @@ bool GCOVProfiler::emitProfileNotes(
         writeString(""); // unuseful current_working_directory
       if (Version >= 80)
         write(0); // unuseful has_unexecuted_blocks
- 
+
       for (auto &Func : Funcs)
         Func->writeOut(Stamp);
- 
+
       write(0);
       write(0);
       out.close();
     }
- 
+
     if (EmitGCDA) {
       emitGlobalConstructor(CountersBySP);
       EmitGCDA = false;
-    } 
+    }
   }
   return true;
 }
- 
+
 void GCOVProfiler::emitGlobalConstructor(
     SmallVectorImpl<std::pair<GlobalVariable *, MDNode *>> &CountersBySP) {
   Function *WriteoutF = insertCounterWriteout(CountersBySP);
   Function *ResetF = insertReset(CountersBySP);
- 
+
   // Create a small bit of code that registers the "__llvm_gcov_writeout" to
   // be executed at exit and the "__llvm_gcov_flush" function to be executed
   // when "__gcov_flush" is called.
@@ -1039,355 +1039,355 @@ void GCOVProfiler::emitGlobalConstructor(
   F->addFnAttr(Attribute::NoInline);
   if (Options.NoRedZone)
     F->addFnAttr(Attribute::NoRedZone);
- 
+
   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
   IRBuilder<> Builder(BB);
- 
+
   FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
   auto *PFTy = PointerType::get(FTy, 0);
   FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false);
- 
+
   // Initialize the environment and register the local writeout, flush and
   // reset functions.
   FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
   Builder.CreateCall(GCOVInit, {WriteoutF, ResetF});
   Builder.CreateRetVoid();
- 
+
   appendToGlobalCtors(*M, F, 0);
-} 
- 
-FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) { 
-  Type *Args[] = { 
-      Type::getInt8PtrTy(*Ctx), // const char *orig_filename 
-      Type::getInt32Ty(*Ctx),   // uint32_t version 
-      Type::getInt32Ty(*Ctx),   // uint32_t checksum 
-  }; 
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); 
-  AttributeList AL; 
-  if (auto AK = TLI->getExtAttrForI32Param(false)) 
-    AL = AL.addParamAttribute(*Ctx, 2, AK); 
-  FunctionCallee Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy, AL); 
-  return Res; 
-} 
- 
-FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) { 
-  Type *Args[] = { 
-    Type::getInt32Ty(*Ctx),    // uint32_t ident 
-    Type::getInt32Ty(*Ctx),    // uint32_t func_checksum 
-    Type::getInt32Ty(*Ctx),    // uint32_t cfg_checksum 
-  }; 
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); 
-  AttributeList AL; 
-  if (auto AK = TLI->getExtAttrForI32Param(false)) { 
-    AL = AL.addParamAttribute(*Ctx, 0, AK); 
-    AL = AL.addParamAttribute(*Ctx, 1, AK); 
-    AL = AL.addParamAttribute(*Ctx, 2, AK); 
-  } 
-  return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); 
-} 
- 
-FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) { 
-  Type *Args[] = { 
-    Type::getInt32Ty(*Ctx),     // uint32_t num_counters 
-    Type::getInt64PtrTy(*Ctx),  // uint64_t *counters 
-  }; 
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); 
-  AttributeList AL; 
-  if (auto AK = TLI->getExtAttrForI32Param(false)) 
-    AL = AL.addParamAttribute(*Ctx, 0, AK); 
-  return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy, AL); 
-} 
- 
-FunctionCallee GCOVProfiler::getSummaryInfoFunc() { 
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); 
-  return M->getOrInsertFunction("llvm_gcda_summary_info", FTy); 
-} 
- 
-FunctionCallee GCOVProfiler::getEndFileFunc() { 
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); 
-  return M->getOrInsertFunction("llvm_gcda_end_file", FTy); 
-} 
- 
-Function *GCOVProfiler::insertCounterWriteout( 
-    ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) { 
-  FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); 
-  Function *WriteoutF = M->getFunction("__llvm_gcov_writeout"); 
-  if (!WriteoutF) 
-    WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage, 
-                                 "__llvm_gcov_writeout", M); 
-  WriteoutF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 
-  WriteoutF->addFnAttr(Attribute::NoInline); 
-  if (Options.NoRedZone) 
-    WriteoutF->addFnAttr(Attribute::NoRedZone); 
- 
-  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); 
-  IRBuilder<> Builder(BB); 
- 
-  auto *TLI = &GetTLI(*WriteoutF); 
- 
-  FunctionCallee StartFile = getStartFileFunc(TLI); 
-  FunctionCallee EmitFunction = getEmitFunctionFunc(TLI); 
-  FunctionCallee EmitArcs = getEmitArcsFunc(TLI); 
-  FunctionCallee SummaryInfo = getSummaryInfoFunc(); 
-  FunctionCallee EndFile = getEndFileFunc(); 
- 
-  NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu"); 
-  if (!CUNodes) { 
-    Builder.CreateRetVoid(); 
-    return WriteoutF; 
-  } 
- 
-  // Collect the relevant data into a large constant data structure that we can 
-  // walk to write out everything. 
-  StructType *StartFileCallArgsTy = StructType::create( 
+}
+
+FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
+  Type *Args[] = {
+      Type::getInt8PtrTy(*Ctx), // const char *orig_filename
+      Type::getInt32Ty(*Ctx),   // uint32_t version
+      Type::getInt32Ty(*Ctx),   // uint32_t checksum
+  };
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+  AttributeList AL;
+  if (auto AK = TLI->getExtAttrForI32Param(false))
+    AL = AL.addParamAttribute(*Ctx, 2, AK);
+  FunctionCallee Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy, AL);
+  return Res;
+}
+
+FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) {
+  Type *Args[] = {
+    Type::getInt32Ty(*Ctx),    // uint32_t ident
+    Type::getInt32Ty(*Ctx),    // uint32_t func_checksum
+    Type::getInt32Ty(*Ctx),    // uint32_t cfg_checksum
+  };
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+  AttributeList AL;
+  if (auto AK = TLI->getExtAttrForI32Param(false)) {
+    AL = AL.addParamAttribute(*Ctx, 0, AK);
+    AL = AL.addParamAttribute(*Ctx, 1, AK);
+    AL = AL.addParamAttribute(*Ctx, 2, AK);
+  }
+  return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
+}
+
+FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) {
+  Type *Args[] = {
+    Type::getInt32Ty(*Ctx),     // uint32_t num_counters
+    Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
+  };
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+  AttributeList AL;
+  if (auto AK = TLI->getExtAttrForI32Param(false))
+    AL = AL.addParamAttribute(*Ctx, 0, AK);
+  return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy, AL);
+}
+
+FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
+}
+
+FunctionCallee GCOVProfiler::getEndFileFunc() {
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
+}
+
+Function *GCOVProfiler::insertCounterWriteout(
+    ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
+  FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+  if (!WriteoutF)
+    WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage,
+                                 "__llvm_gcov_writeout", M);
+  WriteoutF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  WriteoutF->addFnAttr(Attribute::NoInline);
+  if (Options.NoRedZone)
+    WriteoutF->addFnAttr(Attribute::NoRedZone);
+
+  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
+  IRBuilder<> Builder(BB);
+
+  auto *TLI = &GetTLI(*WriteoutF);
+
+  FunctionCallee StartFile = getStartFileFunc(TLI);
+  FunctionCallee EmitFunction = getEmitFunctionFunc(TLI);
+  FunctionCallee EmitArcs = getEmitArcsFunc(TLI);
+  FunctionCallee SummaryInfo = getSummaryInfoFunc();
+  FunctionCallee EndFile = getEndFileFunc();
+
+  NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
+  if (!CUNodes) {
+    Builder.CreateRetVoid();
+    return WriteoutF;
+  }
+
+  // Collect the relevant data into a large constant data structure that we can
+  // walk to write out everything.
+  StructType *StartFileCallArgsTy = StructType::create(
       {Builder.getInt8PtrTy(), Builder.getInt32Ty(), Builder.getInt32Ty()},
       "start_file_args_ty");
-  StructType *EmitFunctionCallArgsTy = StructType::create( 
+  StructType *EmitFunctionCallArgsTy = StructType::create(
       {Builder.getInt32Ty(), Builder.getInt32Ty(), Builder.getInt32Ty()},
       "emit_function_args_ty");
-  StructType *EmitArcsCallArgsTy = StructType::create( 
+  StructType *EmitArcsCallArgsTy = StructType::create(
       {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()},
       "emit_arcs_args_ty");
-  StructType *FileInfoTy = 
-      StructType::create({StartFileCallArgsTy, Builder.getInt32Ty(), 
-                          EmitFunctionCallArgsTy->getPointerTo(), 
+  StructType *FileInfoTy =
+      StructType::create({StartFileCallArgsTy, Builder.getInt32Ty(),
+                          EmitFunctionCallArgsTy->getPointerTo(),
                           EmitArcsCallArgsTy->getPointerTo()},
                          "file_info");
- 
-  Constant *Zero32 = Builder.getInt32(0); 
-  // Build an explicit array of two zeros for use in ConstantExpr GEP building. 
-  Constant *TwoZero32s[] = {Zero32, Zero32}; 
- 
-  SmallVector<Constant *, 8> FileInfos; 
-  for (int i : llvm::seq<int>(0, CUNodes->getNumOperands())) { 
-    auto *CU = cast<DICompileUnit>(CUNodes->getOperand(i)); 
- 
-    // Skip module skeleton (and module) CUs. 
-    if (CU->getDWOId()) 
-      continue; 
- 
-    std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA); 
-    uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i]; 
-    auto *StartFileCallArgs = ConstantStruct::get( 
-        StartFileCallArgsTy, 
-        {Builder.CreateGlobalStringPtr(FilenameGcda), 
-         Builder.getInt32(endian::read32be(Options.Version)), 
-         Builder.getInt32(CfgChecksum)}); 
- 
-    SmallVector<Constant *, 8> EmitFunctionCallArgsArray; 
-    SmallVector<Constant *, 8> EmitArcsCallArgsArray; 
-    for (int j : llvm::seq<int>(0, CountersBySP.size())) { 
-      uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum(); 
-      EmitFunctionCallArgsArray.push_back(ConstantStruct::get( 
-          EmitFunctionCallArgsTy, 
-          {Builder.getInt32(j), 
-           Builder.getInt32(FuncChecksum), 
-           Builder.getInt32(CfgChecksum)})); 
- 
-      GlobalVariable *GV = CountersBySP[j].first; 
-      unsigned Arcs = cast<ArrayType>(GV->getValueType())->getNumElements(); 
-      EmitArcsCallArgsArray.push_back(ConstantStruct::get( 
-          EmitArcsCallArgsTy, 
-          {Builder.getInt32(Arcs), ConstantExpr::getInBoundsGetElementPtr( 
-                                       GV->getValueType(), GV, TwoZero32s)})); 
-    } 
-    // Create global arrays for the two emit calls. 
-    int CountersSize = CountersBySP.size(); 
-    assert(CountersSize == (int)EmitFunctionCallArgsArray.size() && 
-           "Mismatched array size!"); 
-    assert(CountersSize == (int)EmitArcsCallArgsArray.size() && 
-           "Mismatched array size!"); 
-    auto *EmitFunctionCallArgsArrayTy = 
-        ArrayType::get(EmitFunctionCallArgsTy, CountersSize); 
-    auto *EmitFunctionCallArgsArrayGV = new GlobalVariable( 
-        *M, EmitFunctionCallArgsArrayTy, /*isConstant*/ true, 
-        GlobalValue::InternalLinkage, 
-        ConstantArray::get(EmitFunctionCallArgsArrayTy, 
-                           EmitFunctionCallArgsArray), 
-        Twine("__llvm_internal_gcov_emit_function_args.") + Twine(i)); 
-    auto *EmitArcsCallArgsArrayTy = 
-        ArrayType::get(EmitArcsCallArgsTy, CountersSize); 
-    EmitFunctionCallArgsArrayGV->setUnnamedAddr( 
-        GlobalValue::UnnamedAddr::Global); 
-    auto *EmitArcsCallArgsArrayGV = new GlobalVariable( 
-        *M, EmitArcsCallArgsArrayTy, /*isConstant*/ true, 
-        GlobalValue::InternalLinkage, 
-        ConstantArray::get(EmitArcsCallArgsArrayTy, EmitArcsCallArgsArray), 
-        Twine("__llvm_internal_gcov_emit_arcs_args.") + Twine(i)); 
-    EmitArcsCallArgsArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 
- 
-    FileInfos.push_back(ConstantStruct::get( 
-        FileInfoTy, 
-        {StartFileCallArgs, Builder.getInt32(CountersSize), 
-         ConstantExpr::getInBoundsGetElementPtr(EmitFunctionCallArgsArrayTy, 
-                                                EmitFunctionCallArgsArrayGV, 
-                                                TwoZero32s), 
-         ConstantExpr::getInBoundsGetElementPtr( 
-             EmitArcsCallArgsArrayTy, EmitArcsCallArgsArrayGV, TwoZero32s)})); 
-  } 
- 
-  // If we didn't find anything to actually emit, bail on out. 
-  if (FileInfos.empty()) { 
-    Builder.CreateRetVoid(); 
-    return WriteoutF; 
-  } 
- 
-  // To simplify code, we cap the number of file infos we write out to fit 
-  // easily in a 32-bit signed integer. This gives consistent behavior between 
-  // 32-bit and 64-bit systems without requiring (potentially very slow) 64-bit 
-  // operations on 32-bit systems. It also seems unreasonable to try to handle 
-  // more than 2 billion files. 
-  if ((int64_t)FileInfos.size() > (int64_t)INT_MAX) 
-    FileInfos.resize(INT_MAX); 
- 
-  // Create a global for the entire data structure so we can walk it more 
-  // easily. 
-  auto *FileInfoArrayTy = ArrayType::get(FileInfoTy, FileInfos.size()); 
-  auto *FileInfoArrayGV = new GlobalVariable( 
-      *M, FileInfoArrayTy, /*isConstant*/ true, GlobalValue::InternalLinkage, 
-      ConstantArray::get(FileInfoArrayTy, FileInfos), 
-      "__llvm_internal_gcov_emit_file_info"); 
-  FileInfoArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 
- 
-  // Create the CFG for walking this data structure. 
-  auto *FileLoopHeader = 
-      BasicBlock::Create(*Ctx, "file.loop.header", WriteoutF); 
-  auto *CounterLoopHeader = 
-      BasicBlock::Create(*Ctx, "counter.loop.header", WriteoutF); 
-  auto *FileLoopLatch = BasicBlock::Create(*Ctx, "file.loop.latch", WriteoutF); 
-  auto *ExitBB = BasicBlock::Create(*Ctx, "exit", WriteoutF); 
- 
-  // We always have at least one file, so just branch to the header. 
-  Builder.CreateBr(FileLoopHeader); 
- 
-  // The index into the files structure is our loop induction variable. 
-  Builder.SetInsertPoint(FileLoopHeader); 
+
+  Constant *Zero32 = Builder.getInt32(0);
+  // Build an explicit array of two zeros for use in ConstantExpr GEP building.
+  Constant *TwoZero32s[] = {Zero32, Zero32};
+
+  SmallVector<Constant *, 8> FileInfos;
+  for (int i : llvm::seq<int>(0, CUNodes->getNumOperands())) {
+    auto *CU = cast<DICompileUnit>(CUNodes->getOperand(i));
+
+    // Skip module skeleton (and module) CUs.
+    if (CU->getDWOId())
+      continue;
+
+    std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA);
+    uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
+    auto *StartFileCallArgs = ConstantStruct::get(
+        StartFileCallArgsTy,
+        {Builder.CreateGlobalStringPtr(FilenameGcda),
+         Builder.getInt32(endian::read32be(Options.Version)),
+         Builder.getInt32(CfgChecksum)});
+
+    SmallVector<Constant *, 8> EmitFunctionCallArgsArray;
+    SmallVector<Constant *, 8> EmitArcsCallArgsArray;
+    for (int j : llvm::seq<int>(0, CountersBySP.size())) {
+      uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
+      EmitFunctionCallArgsArray.push_back(ConstantStruct::get(
+          EmitFunctionCallArgsTy,
+          {Builder.getInt32(j),
+           Builder.getInt32(FuncChecksum),
+           Builder.getInt32(CfgChecksum)}));
+
+      GlobalVariable *GV = CountersBySP[j].first;
+      unsigned Arcs = cast<ArrayType>(GV->getValueType())->getNumElements();
+      EmitArcsCallArgsArray.push_back(ConstantStruct::get(
+          EmitArcsCallArgsTy,
+          {Builder.getInt32(Arcs), ConstantExpr::getInBoundsGetElementPtr(
+                                       GV->getValueType(), GV, TwoZero32s)}));
+    }
+    // Create global arrays for the two emit calls.
+    int CountersSize = CountersBySP.size();
+    assert(CountersSize == (int)EmitFunctionCallArgsArray.size() &&
+           "Mismatched array size!");
+    assert(CountersSize == (int)EmitArcsCallArgsArray.size() &&
+           "Mismatched array size!");
+    auto *EmitFunctionCallArgsArrayTy =
+        ArrayType::get(EmitFunctionCallArgsTy, CountersSize);
+    auto *EmitFunctionCallArgsArrayGV = new GlobalVariable(
+        *M, EmitFunctionCallArgsArrayTy, /*isConstant*/ true,
+        GlobalValue::InternalLinkage,
+        ConstantArray::get(EmitFunctionCallArgsArrayTy,
+                           EmitFunctionCallArgsArray),
+        Twine("__llvm_internal_gcov_emit_function_args.") + Twine(i));
+    auto *EmitArcsCallArgsArrayTy =
+        ArrayType::get(EmitArcsCallArgsTy, CountersSize);
+    EmitFunctionCallArgsArrayGV->setUnnamedAddr(
+        GlobalValue::UnnamedAddr::Global);
+    auto *EmitArcsCallArgsArrayGV = new GlobalVariable(
+        *M, EmitArcsCallArgsArrayTy, /*isConstant*/ true,
+        GlobalValue::InternalLinkage,
+        ConstantArray::get(EmitArcsCallArgsArrayTy, EmitArcsCallArgsArray),
+        Twine("__llvm_internal_gcov_emit_arcs_args.") + Twine(i));
+    EmitArcsCallArgsArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+    FileInfos.push_back(ConstantStruct::get(
+        FileInfoTy,
+        {StartFileCallArgs, Builder.getInt32(CountersSize),
+         ConstantExpr::getInBoundsGetElementPtr(EmitFunctionCallArgsArrayTy,
+                                                EmitFunctionCallArgsArrayGV,
+                                                TwoZero32s),
+         ConstantExpr::getInBoundsGetElementPtr(
+             EmitArcsCallArgsArrayTy, EmitArcsCallArgsArrayGV, TwoZero32s)}));
+  }
+
+  // If we didn't find anything to actually emit, bail on out.
+  if (FileInfos.empty()) {
+    Builder.CreateRetVoid();
+    return WriteoutF;
+  }
+
+  // To simplify code, we cap the number of file infos we write out to fit
+  // easily in a 32-bit signed integer. This gives consistent behavior between
+  // 32-bit and 64-bit systems without requiring (potentially very slow) 64-bit
+  // operations on 32-bit systems. It also seems unreasonable to try to handle
+  // more than 2 billion files.
+  if ((int64_t)FileInfos.size() > (int64_t)INT_MAX)
+    FileInfos.resize(INT_MAX);
+
+  // Create a global for the entire data structure so we can walk it more
+  // easily.
+  auto *FileInfoArrayTy = ArrayType::get(FileInfoTy, FileInfos.size());
+  auto *FileInfoArrayGV = new GlobalVariable(
+      *M, FileInfoArrayTy, /*isConstant*/ true, GlobalValue::InternalLinkage,
+      ConstantArray::get(FileInfoArrayTy, FileInfos),
+      "__llvm_internal_gcov_emit_file_info");
+  FileInfoArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+  // Create the CFG for walking this data structure.
+  auto *FileLoopHeader =
+      BasicBlock::Create(*Ctx, "file.loop.header", WriteoutF);
+  auto *CounterLoopHeader =
+      BasicBlock::Create(*Ctx, "counter.loop.header", WriteoutF);
+  auto *FileLoopLatch = BasicBlock::Create(*Ctx, "file.loop.latch", WriteoutF);
+  auto *ExitBB = BasicBlock::Create(*Ctx, "exit", WriteoutF);
+
+  // We always have at least one file, so just branch to the header.
+  Builder.CreateBr(FileLoopHeader);
+
+  // The index into the files structure is our loop induction variable.
+  Builder.SetInsertPoint(FileLoopHeader);
   PHINode *IV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
                                   "file_idx");
-  IV->addIncoming(Builder.getInt32(0), BB); 
-  auto *FileInfoPtr = Builder.CreateInBoundsGEP( 
-      FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV}); 
-  auto *StartFileCallArgsPtr = 
+  IV->addIncoming(Builder.getInt32(0), BB);
+  auto *FileInfoPtr = Builder.CreateInBoundsGEP(
+      FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
+  auto *StartFileCallArgsPtr =
       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0, "start_file_args");
-  auto *StartFileCall = Builder.CreateCall( 
-      StartFile, 
-      {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0), 
-                          Builder.CreateStructGEP(StartFileCallArgsTy, 
+  auto *StartFileCall = Builder.CreateCall(
+      StartFile,
+      {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
+                          Builder.CreateStructGEP(StartFileCallArgsTy,
                                                   StartFileCallArgsPtr, 0),
                           "filename"),
-       Builder.CreateLoad(StartFileCallArgsTy->getElementType(1), 
-                          Builder.CreateStructGEP(StartFileCallArgsTy, 
+       Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
+                          Builder.CreateStructGEP(StartFileCallArgsTy,
                                                   StartFileCallArgsPtr, 1),
                           "version"),
-       Builder.CreateLoad(StartFileCallArgsTy->getElementType(2), 
-                          Builder.CreateStructGEP(StartFileCallArgsTy, 
+       Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
+                          Builder.CreateStructGEP(StartFileCallArgsTy,
                                                   StartFileCallArgsPtr, 2),
                           "stamp")});
-  if (auto AK = TLI->getExtAttrForI32Param(false)) 
-    StartFileCall->addParamAttr(2, AK); 
+  if (auto AK = TLI->getExtAttrForI32Param(false))
+    StartFileCall->addParamAttr(2, AK);
   auto *NumCounters = Builder.CreateLoad(
       FileInfoTy->getElementType(1),
       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1), "num_ctrs");
-  auto *EmitFunctionCallArgsArray = 
-      Builder.CreateLoad(FileInfoTy->getElementType(2), 
+  auto *EmitFunctionCallArgsArray =
+      Builder.CreateLoad(FileInfoTy->getElementType(2),
                          Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2),
                          "emit_function_args");
   auto *EmitArcsCallArgsArray = Builder.CreateLoad(
       FileInfoTy->getElementType(3),
       Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3), "emit_arcs_args");
-  auto *EnterCounterLoopCond = 
-      Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters); 
-  Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch); 
- 
-  Builder.SetInsertPoint(CounterLoopHeader); 
+  auto *EnterCounterLoopCond =
+      Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
+  Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
+
+  Builder.SetInsertPoint(CounterLoopHeader);
   auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2,
                                "ctr_idx");
-  JV->addIncoming(Builder.getInt32(0), FileLoopHeader); 
-  auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP( 
-      EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV); 
-  auto *EmitFunctionCall = Builder.CreateCall( 
-      EmitFunction, 
-      {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0), 
-                          Builder.CreateStructGEP(EmitFunctionCallArgsTy, 
+  JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
+  auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
+      EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
+  auto *EmitFunctionCall = Builder.CreateCall(
+      EmitFunction,
+      {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
+                          Builder.CreateStructGEP(EmitFunctionCallArgsTy,
                                                   EmitFunctionCallArgsPtr, 0),
                           "ident"),
-       Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1), 
-                          Builder.CreateStructGEP(EmitFunctionCallArgsTy, 
+       Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
+                          Builder.CreateStructGEP(EmitFunctionCallArgsTy,
                                                   EmitFunctionCallArgsPtr, 1),
                           "func_checkssum"),
-       Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2), 
-                          Builder.CreateStructGEP(EmitFunctionCallArgsTy, 
+       Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
+                          Builder.CreateStructGEP(EmitFunctionCallArgsTy,
                                                   EmitFunctionCallArgsPtr, 2),
                           "cfg_checksum")});
-  if (auto AK = TLI->getExtAttrForI32Param(false)) { 
-    EmitFunctionCall->addParamAttr(0, AK); 
-    EmitFunctionCall->addParamAttr(1, AK); 
-    EmitFunctionCall->addParamAttr(2, AK); 
-  } 
-  auto *EmitArcsCallArgsPtr = 
-      Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV); 
-  auto *EmitArcsCall = Builder.CreateCall( 
-      EmitArcs, 
-      {Builder.CreateLoad( 
-           EmitArcsCallArgsTy->getElementType(0), 
+  if (auto AK = TLI->getExtAttrForI32Param(false)) {
+    EmitFunctionCall->addParamAttr(0, AK);
+    EmitFunctionCall->addParamAttr(1, AK);
+    EmitFunctionCall->addParamAttr(2, AK);
+  }
+  auto *EmitArcsCallArgsPtr =
+      Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
+  auto *EmitArcsCall = Builder.CreateCall(
+      EmitArcs,
+      {Builder.CreateLoad(
+           EmitArcsCallArgsTy->getElementType(0),
            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0),
            "num_counters"),
        Builder.CreateLoad(
            EmitArcsCallArgsTy->getElementType(1),
            Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 1),
            "counters")});
-  if (auto AK = TLI->getExtAttrForI32Param(false)) 
-    EmitArcsCall->addParamAttr(0, AK); 
-  auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1)); 
-  auto *CounterLoopCond = Builder.CreateICmpSLT(NextJV, NumCounters); 
-  Builder.CreateCondBr(CounterLoopCond, CounterLoopHeader, FileLoopLatch); 
-  JV->addIncoming(NextJV, CounterLoopHeader); 
- 
-  Builder.SetInsertPoint(FileLoopLatch); 
-  Builder.CreateCall(SummaryInfo, {}); 
-  Builder.CreateCall(EndFile, {}); 
+  if (auto AK = TLI->getExtAttrForI32Param(false))
+    EmitArcsCall->addParamAttr(0, AK);
+  auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
+  auto *CounterLoopCond = Builder.CreateICmpSLT(NextJV, NumCounters);
+  Builder.CreateCondBr(CounterLoopCond, CounterLoopHeader, FileLoopLatch);
+  JV->addIncoming(NextJV, CounterLoopHeader);
+
+  Builder.SetInsertPoint(FileLoopLatch);
+  Builder.CreateCall(SummaryInfo, {});
+  Builder.CreateCall(EndFile, {});
   auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1), "next_file_idx");
-  auto *FileLoopCond = 
-      Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size())); 
-  Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB); 
-  IV->addIncoming(NextIV, FileLoopLatch); 
- 
-  Builder.SetInsertPoint(ExitBB); 
-  Builder.CreateRetVoid(); 
- 
-  return WriteoutF; 
-} 
- 
-Function *GCOVProfiler::insertReset( 
-    ArrayRef<std::pair<GlobalVariable *, MDNode *>> CountersBySP) { 
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); 
-  Function *ResetF = M->getFunction("__llvm_gcov_reset"); 
-  if (!ResetF) 
-    ResetF = Function::Create(FTy, GlobalValue::InternalLinkage, 
-                              "__llvm_gcov_reset", M); 
-  ResetF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 
-  ResetF->addFnAttr(Attribute::NoInline); 
-  if (Options.NoRedZone) 
-    ResetF->addFnAttr(Attribute::NoRedZone); 
- 
-  BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF); 
-  IRBuilder<> Builder(Entry); 
- 
-  // Zero out the counters. 
-  for (const auto &I : CountersBySP) { 
-    GlobalVariable *GV = I.first; 
-    Constant *Null = Constant::getNullValue(GV->getValueType()); 
-    Builder.CreateStore(Null, GV); 
-  } 
- 
-  Type *RetTy = ResetF->getReturnType(); 
-  if (RetTy->isVoidTy()) 
-    Builder.CreateRetVoid(); 
-  else if (RetTy->isIntegerTy()) 
-    // Used if __llvm_gcov_reset was implicitly declared. 
-    Builder.CreateRet(ConstantInt::get(RetTy, 0)); 
-  else 
-    report_fatal_error("invalid return type for __llvm_gcov_reset"); 
- 
-  return ResetF; 
-} 
+  auto *FileLoopCond =
+      Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size()));
+  Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB);
+  IV->addIncoming(NextIV, FileLoopLatch);
+
+  Builder.SetInsertPoint(ExitBB);
+  Builder.CreateRetVoid();
+
+  return WriteoutF;
+}
+
+Function *GCOVProfiler::insertReset(
+    ArrayRef<std::pair<GlobalVariable *, MDNode *>> CountersBySP) {
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  Function *ResetF = M->getFunction("__llvm_gcov_reset");
+  if (!ResetF)
+    ResetF = Function::Create(FTy, GlobalValue::InternalLinkage,
+                              "__llvm_gcov_reset", M);
+  ResetF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  ResetF->addFnAttr(Attribute::NoInline);
+  if (Options.NoRedZone)
+    ResetF->addFnAttr(Attribute::NoRedZone);
+
+  BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF);
+  IRBuilder<> Builder(Entry);
+
+  // Zero out the counters.
+  for (const auto &I : CountersBySP) {
+    GlobalVariable *GV = I.first;
+    Constant *Null = Constant::getNullValue(GV->getValueType());
+    Builder.CreateStore(Null, GV);
+  }
+
+  Type *RetTy = ResetF->getReturnType();
+  if (RetTy->isVoidTy())
+    Builder.CreateRetVoid();
+  else if (RetTy->isIntegerTy())
+    // Used if __llvm_gcov_reset was implicitly declared.
+    Builder.CreateRet(ConstantInt::get(RetTy, 0));
+  else
+    report_fatal_error("invalid return type for __llvm_gcov_reset");
+
+  return ResetF;
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 1dffdacc3a..fedd9bfc97 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1,375 +1,375 @@
-//===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-/// \file 
-/// This file is a part of HWAddressSanitizer, an address sanity checker 
-/// based on tagged addressing. 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" 
-#include "llvm/ADT/MapVector.h" 
-#include "llvm/ADT/SmallVector.h" 
-#include "llvm/ADT/StringExtras.h" 
-#include "llvm/ADT/StringRef.h" 
-#include "llvm/ADT/Triple.h" 
-#include "llvm/BinaryFormat/ELF.h" 
-#include "llvm/IR/Attributes.h" 
-#include "llvm/IR/BasicBlock.h" 
-#include "llvm/IR/Constant.h" 
-#include "llvm/IR/Constants.h" 
-#include "llvm/IR/DataLayout.h" 
-#include "llvm/IR/DebugInfoMetadata.h" 
-#include "llvm/IR/DerivedTypes.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/InlineAsm.h" 
-#include "llvm/IR/InstVisitor.h" 
-#include "llvm/IR/Instruction.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/IR/Intrinsics.h" 
-#include "llvm/IR/LLVMContext.h" 
-#include "llvm/IR/MDBuilder.h" 
-#include "llvm/IR/Module.h" 
-#include "llvm/IR/Type.h" 
-#include "llvm/IR/Value.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Pass.h" 
-#include "llvm/Support/Casting.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/raw_ostream.h" 
-#include "llvm/Transforms/Instrumentation.h" 
-#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include "llvm/Transforms/Utils/ModuleUtils.h" 
-#include "llvm/Transforms/Utils/PromoteMemToReg.h" 
-#include <sstream> 
- 
-using namespace llvm; 
- 
-#define DEBUG_TYPE "hwasan" 
- 
+//===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file is a part of HWAddressSanitizer, an address sanity checker
+/// based on tagged addressing.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hwasan"
+
 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
 const char kHwasanNoteName[] = "hwasan.note";
 const char kHwasanInitName[] = "__hwasan_init";
 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
- 
+
 const char kHwasanShadowMemoryDynamicAddress[] =
-    "__hwasan_shadow_memory_dynamic_address"; 
- 
-// Accesses sizes are powers of two: 1, 2, 4, 8, 16. 
-static const size_t kNumberOfAccessSizes = 5; 
- 
-static const size_t kDefaultShadowScale = 4; 
-static const uint64_t kDynamicShadowSentinel = 
-    std::numeric_limits<uint64_t>::max(); 
-static const unsigned kPointerTagShift = 56; 
- 
-static const unsigned kShadowBaseAlignment = 32; 
- 
-static cl::opt<std::string> ClMemoryAccessCallbackPrefix( 
-    "hwasan-memory-access-callback-prefix", 
-    cl::desc("Prefix for memory access callbacks"), cl::Hidden, 
-    cl::init("__hwasan_")); 
- 
-static cl::opt<bool> 
-    ClInstrumentWithCalls("hwasan-instrument-with-calls", 
-                cl::desc("instrument reads and writes with callbacks"), 
-                cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads", 
-                                       cl::desc("instrument read instructions"), 
-                                       cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClInstrumentWrites( 
-    "hwasan-instrument-writes", cl::desc("instrument write instructions"), 
-    cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClInstrumentAtomics( 
-    "hwasan-instrument-atomics", 
-    cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, 
-    cl::init(true)); 
- 
-static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval", 
-                                       cl::desc("instrument byval arguments"), 
-                                       cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClRecover( 
-    "hwasan-recover", 
-    cl::desc("Enable recovery mode (continue-after-error)."), 
-    cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack", 
-                                       cl::desc("instrument stack (allocas)"), 
-                                       cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClUARRetagToZero( 
-    "hwasan-uar-retag-to-zero", 
-    cl::desc("Clear alloca tags before returning from the function to allow " 
-             "non-instrumented and instrumented function calls mix. When set " 
-             "to false, allocas are retagged before returning from the " 
-             "function to detect use after return."), 
-    cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClGenerateTagsWithCalls( 
-    "hwasan-generate-tags-with-calls", 
-    cl::desc("generate new tags with runtime library calls"), cl::Hidden, 
-    cl::init(false)); 
- 
-static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"), 
-                               cl::Hidden, cl::init(false), cl::ZeroOrMore); 
- 
-static cl::opt<int> ClMatchAllTag( 
-    "hwasan-match-all-tag", 
-    cl::desc("don't report bad accesses via pointers with this tag"), 
-    cl::Hidden, cl::init(-1)); 
- 
-static cl::opt<bool> ClEnableKhwasan( 
-    "hwasan-kernel", 
-    cl::desc("Enable KernelHWAddressSanitizer instrumentation"), 
-    cl::Hidden, cl::init(false)); 
- 
-// These flags allow to change the shadow mapping and control how shadow memory 
-// is accessed. The shadow mapping looks like: 
-//    Shadow = (Mem >> scale) + offset 
- 
-static cl::opt<uint64_t> 
-    ClMappingOffset("hwasan-mapping-offset", 
-                    cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"), 
-                    cl::Hidden, cl::init(0)); 
- 
-static cl::opt<bool> 
-    ClWithIfunc("hwasan-with-ifunc", 
-                cl::desc("Access dynamic shadow through an ifunc global on " 
-                         "platforms that support this"), 
-                cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClWithTls( 
-    "hwasan-with-tls", 
-    cl::desc("Access dynamic shadow through an thread-local pointer on " 
-             "platforms that support this"), 
-    cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> 
-    ClRecordStackHistory("hwasan-record-stack-history", 
-                         cl::desc("Record stack frames with tagged allocations " 
-                                  "in a thread-local ring buffer"), 
-                         cl::Hidden, cl::init(true)); 
-static cl::opt<bool> 
-    ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics", 
-                              cl::desc("instrument memory intrinsics"), 
-                              cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> 
-    ClInstrumentLandingPads("hwasan-instrument-landing-pads", 
-                            cl::desc("instrument landing pads"), cl::Hidden, 
-                            cl::init(false), cl::ZeroOrMore); 
- 
-static cl::opt<bool> ClUseShortGranules( 
-    "hwasan-use-short-granules", 
-    cl::desc("use short granules in allocas and outlined checks"), cl::Hidden, 
-    cl::init(false), cl::ZeroOrMore); 
- 
-static cl::opt<bool> ClInstrumentPersonalityFunctions( 
-    "hwasan-instrument-personality-functions", 
-    cl::desc("instrument personality functions"), cl::Hidden, cl::init(false), 
-    cl::ZeroOrMore); 
- 
-static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks", 
-                                       cl::desc("inline all checks"), 
-                                       cl::Hidden, cl::init(false)); 
- 
-namespace { 
- 
-/// An instrumentation pass implementing detection of addressability bugs 
-/// using tagged pointers. 
-class HWAddressSanitizer { 
-public: 
-  explicit HWAddressSanitizer(Module &M, bool CompileKernel = false, 
-                              bool Recover = false) : M(M) { 
-    this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover; 
-    this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ? 
-        ClEnableKhwasan : CompileKernel; 
- 
-    initializeModule(); 
-  } 
- 
-  bool sanitizeFunction(Function &F); 
-  void initializeModule(); 
+    "__hwasan_shadow_memory_dynamic_address";
+
+// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+static const size_t kNumberOfAccessSizes = 5;
+
+static const size_t kDefaultShadowScale = 4;
+static const uint64_t kDynamicShadowSentinel =
+    std::numeric_limits<uint64_t>::max();
+static const unsigned kPointerTagShift = 56;
+
+static const unsigned kShadowBaseAlignment = 32;
+
+static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
+    "hwasan-memory-access-callback-prefix",
+    cl::desc("Prefix for memory access callbacks"), cl::Hidden,
+    cl::init("__hwasan_"));
+
+static cl::opt<bool>
+    ClInstrumentWithCalls("hwasan-instrument-with-calls",
+                cl::desc("instrument reads and writes with callbacks"),
+                cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
+                                       cl::desc("instrument read instructions"),
+                                       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInstrumentWrites(
+    "hwasan-instrument-writes", cl::desc("instrument write instructions"),
+    cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInstrumentAtomics(
+    "hwasan-instrument-atomics",
+    cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
+    cl::init(true));
+
+static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
+                                       cl::desc("instrument byval arguments"),
+                                       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClRecover(
+    "hwasan-recover",
+    cl::desc("Enable recovery mode (continue-after-error)."),
+    cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
+                                       cl::desc("instrument stack (allocas)"),
+                                       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClUARRetagToZero(
+    "hwasan-uar-retag-to-zero",
+    cl::desc("Clear alloca tags before returning from the function to allow "
+             "non-instrumented and instrumented function calls mix. When set "
+             "to false, allocas are retagged before returning from the "
+             "function to detect use after return."),
+    cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClGenerateTagsWithCalls(
+    "hwasan-generate-tags-with-calls",
+    cl::desc("generate new tags with runtime library calls"), cl::Hidden,
+    cl::init(false));
+
+static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
+                               cl::Hidden, cl::init(false), cl::ZeroOrMore);
+
+static cl::opt<int> ClMatchAllTag(
+    "hwasan-match-all-tag",
+    cl::desc("don't report bad accesses via pointers with this tag"),
+    cl::Hidden, cl::init(-1));
+
+static cl::opt<bool> ClEnableKhwasan(
+    "hwasan-kernel",
+    cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
+    cl::Hidden, cl::init(false));
+
+// These flags allow to change the shadow mapping and control how shadow memory
+// is accessed. The shadow mapping looks like:
+//    Shadow = (Mem >> scale) + offset
+
+static cl::opt<uint64_t>
+    ClMappingOffset("hwasan-mapping-offset",
+                    cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
+                    cl::Hidden, cl::init(0));
+
+static cl::opt<bool>
+    ClWithIfunc("hwasan-with-ifunc",
+                cl::desc("Access dynamic shadow through an ifunc global on "
+                         "platforms that support this"),
+                cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClWithTls(
+    "hwasan-with-tls",
+    cl::desc("Access dynamic shadow through an thread-local pointer on "
+             "platforms that support this"),
+    cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+    ClRecordStackHistory("hwasan-record-stack-history",
+                         cl::desc("Record stack frames with tagged allocations "
+                                  "in a thread-local ring buffer"),
+                         cl::Hidden, cl::init(true));
+static cl::opt<bool>
+    ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
+                              cl::desc("instrument memory intrinsics"),
+                              cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+    ClInstrumentLandingPads("hwasan-instrument-landing-pads",
+                            cl::desc("instrument landing pads"), cl::Hidden,
+                            cl::init(false), cl::ZeroOrMore);
+
+static cl::opt<bool> ClUseShortGranules(
+    "hwasan-use-short-granules",
+    cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
+    cl::init(false), cl::ZeroOrMore);
+
+static cl::opt<bool> ClInstrumentPersonalityFunctions(
+    "hwasan-instrument-personality-functions",
+    cl::desc("instrument personality functions"), cl::Hidden, cl::init(false),
+    cl::ZeroOrMore);
+
+static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
+                                       cl::desc("inline all checks"),
+                                       cl::Hidden, cl::init(false));
+
+namespace {
+
+/// An instrumentation pass implementing detection of addressability bugs
+/// using tagged pointers.
+class HWAddressSanitizer {
+public:
+  explicit HWAddressSanitizer(Module &M, bool CompileKernel = false,
+                              bool Recover = false) : M(M) {
+    this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
+    this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ?
+        ClEnableKhwasan : CompileKernel;
+
+    initializeModule();
+  }
+
+  bool sanitizeFunction(Function &F);
+  void initializeModule();
   void createHwasanCtorComdat();
- 
-  void initializeCallbacks(Module &M); 
- 
+
+  void initializeCallbacks(Module &M);
+
   Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
 
-  Value *getDynamicShadowIfunc(IRBuilder<> &IRB); 
+  Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
   Value *getShadowNonTls(IRBuilder<> &IRB);
- 
-  void untagPointerOperand(Instruction *I, Value *Addr); 
-  Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); 
-  void instrumentMemAccessInline(Value *Ptr, bool IsWrite, 
-                                 unsigned AccessSizeIndex, 
-                                 Instruction *InsertBefore); 
-  void instrumentMemIntrinsic(MemIntrinsic *MI); 
-  bool instrumentMemAccess(InterestingMemoryOperand &O); 
-  bool ignoreAccess(Value *Ptr); 
-  void getInterestingMemoryOperands( 
-      Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting); 
- 
-  bool isInterestingAlloca(const AllocaInst &AI); 
-  bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size); 
-  Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag); 
-  Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong); 
-  bool instrumentStack( 
-      SmallVectorImpl<AllocaInst *> &Allocas, 
-      DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap, 
-      SmallVectorImpl<Instruction *> &RetVec, Value *StackTag); 
-  Value *readRegister(IRBuilder<> &IRB, StringRef Name); 
-  bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec); 
-  Value *getNextTagWithCall(IRBuilder<> &IRB); 
-  Value *getStackBaseTag(IRBuilder<> &IRB); 
-  Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI, 
-                     unsigned AllocaNo); 
-  Value *getUARTag(IRBuilder<> &IRB, Value *StackTag); 
- 
-  Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty); 
-  void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord); 
- 
-  void instrumentGlobal(GlobalVariable *GV, uint8_t Tag); 
-  void instrumentGlobals(); 
- 
-  void instrumentPersonalityFunctions(); 
- 
-private: 
-  LLVMContext *C; 
-  Module &M; 
-  Triple TargetTriple; 
-  FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset; 
-  FunctionCallee HWAsanHandleVfork; 
- 
-  /// This struct defines the shadow mapping using the rule: 
-  ///   shadow = (mem >> Scale) + Offset. 
-  /// If InGlobal is true, then 
-  ///   extern char __hwasan_shadow[]; 
-  ///   shadow = (mem >> Scale) + &__hwasan_shadow 
-  /// If InTls is true, then 
-  ///   extern char *__hwasan_tls; 
-  ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment) 
-  struct ShadowMapping { 
-    int Scale; 
-    uint64_t Offset; 
-    bool InGlobal; 
-    bool InTls; 
- 
-    void init(Triple &TargetTriple); 
-    unsigned getObjectAlignment() const { return 1U << Scale; } 
-  }; 
-  ShadowMapping Mapping; 
- 
-  Type *VoidTy = Type::getVoidTy(M.getContext()); 
-  Type *IntptrTy; 
-  Type *Int8PtrTy; 
-  Type *Int8Ty; 
-  Type *Int32Ty; 
-  Type *Int64Ty = Type::getInt64Ty(M.getContext()); 
- 
-  bool CompileKernel; 
-  bool Recover; 
+
+  void untagPointerOperand(Instruction *I, Value *Addr);
+  Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+  void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
+                                 unsigned AccessSizeIndex,
+                                 Instruction *InsertBefore);
+  void instrumentMemIntrinsic(MemIntrinsic *MI);
+  bool instrumentMemAccess(InterestingMemoryOperand &O);
+  bool ignoreAccess(Value *Ptr);
+  void getInterestingMemoryOperands(
+      Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
+
+  bool isInterestingAlloca(const AllocaInst &AI);
+  bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
+  Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
+  Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
+  bool instrumentStack(
+      SmallVectorImpl<AllocaInst *> &Allocas,
+      DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
+      SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
+  Value *readRegister(IRBuilder<> &IRB, StringRef Name);
+  bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
+  Value *getNextTagWithCall(IRBuilder<> &IRB);
+  Value *getStackBaseTag(IRBuilder<> &IRB);
+  Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
+                     unsigned AllocaNo);
+  Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
+
+  Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
+  void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
+
+  void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
+  void instrumentGlobals();
+
+  void instrumentPersonalityFunctions();
+
+private:
+  LLVMContext *C;
+  Module &M;
+  Triple TargetTriple;
+  FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
+  FunctionCallee HWAsanHandleVfork;
+
+  /// This struct defines the shadow mapping using the rule:
+  ///   shadow = (mem >> Scale) + Offset.
+  /// If InGlobal is true, then
+  ///   extern char __hwasan_shadow[];
+  ///   shadow = (mem >> Scale) + &__hwasan_shadow
+  /// If InTls is true, then
+  ///   extern char *__hwasan_tls;
+  ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
+  struct ShadowMapping {
+    int Scale;
+    uint64_t Offset;
+    bool InGlobal;
+    bool InTls;
+
+    void init(Triple &TargetTriple);
+    unsigned getObjectAlignment() const { return 1U << Scale; }
+  };
+  ShadowMapping Mapping;
+
+  Type *VoidTy = Type::getVoidTy(M.getContext());
+  Type *IntptrTy;
+  Type *Int8PtrTy;
+  Type *Int8Ty;
+  Type *Int32Ty;
+  Type *Int64Ty = Type::getInt64Ty(M.getContext());
+
+  bool CompileKernel;
+  bool Recover;
   bool OutlinedChecks;
-  bool UseShortGranules; 
-  bool InstrumentLandingPads; 
- 
+  bool UseShortGranules;
+  bool InstrumentLandingPads;
+
   bool HasMatchAllTag = false;
   uint8_t MatchAllTag = 0;
 
-  Function *HwasanCtorFunction; 
- 
-  FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes]; 
-  FunctionCallee HwasanMemoryAccessCallbackSized[2]; 
- 
-  FunctionCallee HwasanTagMemoryFunc; 
-  FunctionCallee HwasanGenerateTagFunc; 
- 
-  Constant *ShadowGlobal; 
- 
+  Function *HwasanCtorFunction;
+
+  FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
+  FunctionCallee HwasanMemoryAccessCallbackSized[2];
+
+  FunctionCallee HwasanTagMemoryFunc;
+  FunctionCallee HwasanGenerateTagFunc;
+
+  Constant *ShadowGlobal;
+
   Value *ShadowBase = nullptr;
-  Value *StackBaseTag = nullptr; 
-  GlobalValue *ThreadPtrGlobal = nullptr; 
-}; 
- 
-class HWAddressSanitizerLegacyPass : public FunctionPass { 
-public: 
-  // Pass identification, replacement for typeid. 
-  static char ID; 
- 
-  explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false, 
-                                        bool Recover = false) 
-      : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover) { 
-    initializeHWAddressSanitizerLegacyPassPass( 
-        *PassRegistry::getPassRegistry()); 
-  } 
- 
-  StringRef getPassName() const override { return "HWAddressSanitizer"; } 
- 
-  bool doInitialization(Module &M) override { 
-    HWASan = std::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover); 
-    return true; 
-  } 
- 
-  bool runOnFunction(Function &F) override { 
-    return HWASan->sanitizeFunction(F); 
-  } 
- 
-  bool doFinalization(Module &M) override { 
-    HWASan.reset(); 
-    return false; 
-  } 
- 
-private: 
-  std::unique_ptr<HWAddressSanitizer> HWASan; 
-  bool CompileKernel; 
-  bool Recover; 
-}; 
- 
-} // end anonymous namespace 
- 
-char HWAddressSanitizerLegacyPass::ID = 0; 
- 
-INITIALIZE_PASS_BEGIN( 
-    HWAddressSanitizerLegacyPass, "hwasan", 
-    "HWAddressSanitizer: detect memory bugs using tagged addressing.", false, 
-    false) 
-INITIALIZE_PASS_END( 
-    HWAddressSanitizerLegacyPass, "hwasan", 
-    "HWAddressSanitizer: detect memory bugs using tagged addressing.", false, 
-    false) 
- 
-FunctionPass *llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel, 
-                                                           bool Recover) { 
-  assert(!CompileKernel || Recover); 
-  return new HWAddressSanitizerLegacyPass(CompileKernel, Recover); 
-} 
- 
-HWAddressSanitizerPass::HWAddressSanitizerPass(bool CompileKernel, bool Recover) 
-    : CompileKernel(CompileKernel), Recover(Recover) {} 
- 
-PreservedAnalyses HWAddressSanitizerPass::run(Module &M, 
-                                              ModuleAnalysisManager &MAM) { 
-  HWAddressSanitizer HWASan(M, CompileKernel, Recover); 
-  bool Modified = false; 
-  for (Function &F : M) 
-    Modified |= HWASan.sanitizeFunction(F); 
-  if (Modified) 
-    return PreservedAnalyses::none(); 
-  return PreservedAnalyses::all(); 
-} 
- 
+  Value *StackBaseTag = nullptr;
+  GlobalValue *ThreadPtrGlobal = nullptr;
+};
+
+class HWAddressSanitizerLegacyPass : public FunctionPass {
+public:
+  // Pass identification, replacement for typeid.
+  static char ID;
+
+  explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false,
+                                        bool Recover = false)
+      : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover) {
+    initializeHWAddressSanitizerLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "HWAddressSanitizer"; }
+
+  bool doInitialization(Module &M) override {
+    HWASan = std::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover);
+    return true;
+  }
+
+  bool runOnFunction(Function &F) override {
+    return HWASan->sanitizeFunction(F);
+  }
+
+  bool doFinalization(Module &M) override {
+    HWASan.reset();
+    return false;
+  }
+
+private:
+  std::unique_ptr<HWAddressSanitizer> HWASan;
+  bool CompileKernel;
+  bool Recover;
+};
+
+} // end anonymous namespace
+
+char HWAddressSanitizerLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(
+    HWAddressSanitizerLegacyPass, "hwasan",
+    "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
+    false)
+INITIALIZE_PASS_END(
+    HWAddressSanitizerLegacyPass, "hwasan",
+    "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
+    false)
+
+FunctionPass *llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel,
+                                                           bool Recover) {
+  assert(!CompileKernel || Recover);
+  return new HWAddressSanitizerLegacyPass(CompileKernel, Recover);
+}
+
+HWAddressSanitizerPass::HWAddressSanitizerPass(bool CompileKernel, bool Recover)
+    : CompileKernel(CompileKernel), Recover(Recover) {}
+
+PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
+                                              ModuleAnalysisManager &MAM) {
+  HWAddressSanitizer HWASan(M, CompileKernel, Recover);
+  bool Modified = false;
+  for (Function &F : M)
+    Modified |= HWASan.sanitizeFunction(F);
+  if (Modified)
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
 void HWAddressSanitizer::createHwasanCtorComdat() {
   std::tie(HwasanCtorFunction, std::ignore) =
       getOrCreateSanitizerCtorAndInitFunctions(
@@ -470,38 +470,38 @@ void HWAddressSanitizer::createHwasanCtorComdat() {
   appendToCompilerUsed(M, Dummy);
 }
 
-/// Module-level initialization. 
-/// 
-/// inserts a call to __hwasan_init to the module's constructor list. 
-void HWAddressSanitizer::initializeModule() { 
-  LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n"); 
-  auto &DL = M.getDataLayout(); 
- 
-  TargetTriple = Triple(M.getTargetTriple()); 
- 
-  Mapping.init(TargetTriple); 
- 
-  C = &(M.getContext()); 
-  IRBuilder<> IRB(*C); 
-  IntptrTy = IRB.getIntPtrTy(DL); 
-  Int8PtrTy = IRB.getInt8PtrTy(); 
-  Int8Ty = IRB.getInt8Ty(); 
-  Int32Ty = IRB.getInt32Ty(); 
- 
-  HwasanCtorFunction = nullptr; 
- 
-  // Older versions of Android do not have the required runtime support for 
-  // short granules, global or personality function instrumentation. On other 
-  // platforms we currently require using the latest version of the runtime. 
-  bool NewRuntime = 
-      !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30); 
- 
-  UseShortGranules = 
-      ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime; 
+/// Module-level initialization.
+///
+/// inserts a call to __hwasan_init to the module's constructor list.
+void HWAddressSanitizer::initializeModule() {
+  LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
+  auto &DL = M.getDataLayout();
+
+  TargetTriple = Triple(M.getTargetTriple());
+
+  Mapping.init(TargetTriple);
+
+  C = &(M.getContext());
+  IRBuilder<> IRB(*C);
+  IntptrTy = IRB.getIntPtrTy(DL);
+  Int8PtrTy = IRB.getInt8PtrTy();
+  Int8Ty = IRB.getInt8Ty();
+  Int32Ty = IRB.getInt32Ty();
+
+  HwasanCtorFunction = nullptr;
+
+  // Older versions of Android do not have the required runtime support for
+  // short granules, global or personality function instrumentation. On other
+  // platforms we currently require using the latest version of the runtime.
+  bool NewRuntime =
+      !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
+
+  UseShortGranules =
+      ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
   OutlinedChecks =
       TargetTriple.isAArch64() && TargetTriple.isOSBinFormatELF() &&
       (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
- 
+
   if (ClMatchAllTag.getNumOccurrences()) {
     if (ClMatchAllTag != -1) {
       HasMatchAllTag = true;
@@ -512,86 +512,86 @@ void HWAddressSanitizer::initializeModule() {
     MatchAllTag = 0xFF;
   }
 
-  // If we don't have personality function support, fall back to landing pads. 
-  InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences() 
-                              ? ClInstrumentLandingPads 
-                              : !NewRuntime; 
- 
-  if (!CompileKernel) { 
+  // If we don't have personality function support, fall back to landing pads.
+  InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
+                              ? ClInstrumentLandingPads
+                              : !NewRuntime;
+
+  if (!CompileKernel) {
     createHwasanCtorComdat();
-    bool InstrumentGlobals = 
-        ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime; 
-    if (InstrumentGlobals) 
-      instrumentGlobals(); 
- 
-    bool InstrumentPersonalityFunctions = 
-        ClInstrumentPersonalityFunctions.getNumOccurrences() 
-            ? ClInstrumentPersonalityFunctions 
-            : NewRuntime; 
-    if (InstrumentPersonalityFunctions) 
-      instrumentPersonalityFunctions(); 
-  } 
- 
-  if (!TargetTriple.isAndroid()) { 
-    Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] { 
-      auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false, 
-                                    GlobalValue::ExternalLinkage, nullptr, 
-                                    "__hwasan_tls", nullptr, 
-                                    GlobalVariable::InitialExecTLSModel); 
-      appendToCompilerUsed(M, GV); 
-      return GV; 
-    }); 
-    ThreadPtrGlobal = cast<GlobalVariable>(C); 
-  } 
-} 
- 
-void HWAddressSanitizer::initializeCallbacks(Module &M) { 
-  IRBuilder<> IRB(*C); 
-  for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { 
-    const std::string TypeStr = AccessIsWrite ? "store" : "load"; 
-    const std::string EndingStr = Recover ? "_noabort" : ""; 
- 
-    HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction( 
-        ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr, 
-        FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false)); 
- 
-    for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; 
-         AccessSizeIndex++) { 
-      HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] = 
-          M.getOrInsertFunction( 
-              ClMemoryAccessCallbackPrefix + TypeStr + 
-                  itostr(1ULL << AccessSizeIndex) + EndingStr, 
-              FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false)); 
-    } 
-  } 
- 
-  HwasanTagMemoryFunc = M.getOrInsertFunction( 
-      "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy); 
-  HwasanGenerateTagFunc = 
-      M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty); 
- 
-  ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow", 
-                                     ArrayType::get(IRB.getInt8Ty(), 0)); 
- 
-  const std::string MemIntrinCallbackPrefix = 
-      CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix; 
-  HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove", 
-                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 
-                                        IRB.getInt8PtrTy(), IntptrTy); 
-  HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy", 
-                                       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 
-                                       IRB.getInt8PtrTy(), IntptrTy); 
-  HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset", 
-                                       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 
-                                       IRB.getInt32Ty(), IntptrTy); 
- 
-  HWAsanHandleVfork = 
-      M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy); 
-} 
- 
+    bool InstrumentGlobals =
+        ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
+    if (InstrumentGlobals)
+      instrumentGlobals();
+
+    bool InstrumentPersonalityFunctions =
+        ClInstrumentPersonalityFunctions.getNumOccurrences()
+            ? ClInstrumentPersonalityFunctions
+            : NewRuntime;
+    if (InstrumentPersonalityFunctions)
+      instrumentPersonalityFunctions();
+  }
+
+  if (!TargetTriple.isAndroid()) {
+    Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
+      auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
+                                    GlobalValue::ExternalLinkage, nullptr,
+                                    "__hwasan_tls", nullptr,
+                                    GlobalVariable::InitialExecTLSModel);
+      appendToCompilerUsed(M, GV);
+      return GV;
+    });
+    ThreadPtrGlobal = cast<GlobalVariable>(C);
+  }
+}
+
+void HWAddressSanitizer::initializeCallbacks(Module &M) {
+  IRBuilder<> IRB(*C);
+  for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
+    const std::string TypeStr = AccessIsWrite ? "store" : "load";
+    const std::string EndingStr = Recover ? "_noabort" : "";
+
+    HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
+        ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
+        FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
+
+    for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+         AccessSizeIndex++) {
+      HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
+          M.getOrInsertFunction(
+              ClMemoryAccessCallbackPrefix + TypeStr +
+                  itostr(1ULL << AccessSizeIndex) + EndingStr,
+              FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
+    }
+  }
+
+  HwasanTagMemoryFunc = M.getOrInsertFunction(
+      "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
+  HwasanGenerateTagFunc =
+      M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
+
+  ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
+                                     ArrayType::get(IRB.getInt8Ty(), 0));
+
+  const std::string MemIntrinCallbackPrefix =
+      CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
+  HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
+                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                        IRB.getInt8PtrTy(), IntptrTy);
+  HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
+                                       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                       IRB.getInt8PtrTy(), IntptrTy);
+  HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
+                                       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                       IRB.getInt32Ty(), IntptrTy);
+
+  HWAsanHandleVfork =
+      M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
+}
+
 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
-  // An empty inline asm with input reg == output reg. 
-  // An opaque no-op cast, basically. 
+  // An empty inline asm with input reg == output reg.
+  // An opaque no-op cast, basically.
   // This prevents code bloat as a result of rematerializing trivial definitions
   // such as constants or global addresses at every load and store.
   InlineAsm *Asm =
@@ -599,128 +599,128 @@ Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
                      StringRef(""), StringRef("=r,0"),
                      /*hasSideEffects=*/false);
   return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
-} 
- 
+}
+
 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
   return getOpaqueNoopCast(IRB, ShadowGlobal);
 }
 
 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
-  if (Mapping.Offset != kDynamicShadowSentinel) 
+  if (Mapping.Offset != kDynamicShadowSentinel)
     return getOpaqueNoopCast(
         IRB, ConstantExpr::getIntToPtr(
                  ConstantInt::get(IntptrTy, Mapping.Offset), Int8PtrTy));
- 
-  if (Mapping.InGlobal) { 
-    return getDynamicShadowIfunc(IRB); 
-  } else { 
-    Value *GlobalDynamicAddress = 
-        IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal( 
-            kHwasanShadowMemoryDynamicAddress, Int8PtrTy); 
-    return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress); 
-  } 
-} 
- 
-bool HWAddressSanitizer::ignoreAccess(Value *Ptr) { 
-  // Do not instrument acesses from different address spaces; we cannot deal 
-  // with them. 
-  Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType()); 
-  if (PtrTy->getPointerAddressSpace() != 0) 
-    return true; 
- 
-  // Ignore swifterror addresses. 
-  // swifterror memory addresses are mem2reg promoted by instruction 
-  // selection. As such they cannot have regular uses like an instrumentation 
-  // function and it makes no sense to track them as memory. 
-  if (Ptr->isSwiftError()) 
-    return true; 
- 
-  return false; 
-} 
- 
-void HWAddressSanitizer::getInterestingMemoryOperands( 
-    Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) { 
-  // Skip memory accesses inserted by another instrumentation. 
-  if (I->hasMetadata("nosanitize")) 
-    return; 
- 
-  // Do not instrument the load fetching the dynamic shadow address. 
+
+  if (Mapping.InGlobal) {
+    return getDynamicShadowIfunc(IRB);
+  } else {
+    Value *GlobalDynamicAddress =
+        IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
+            kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
+    return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
+  }
+}
+
+bool HWAddressSanitizer::ignoreAccess(Value *Ptr) {
+  // Do not instrument acesses from different address spaces; we cannot deal
+  // with them.
+  Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
+  if (PtrTy->getPointerAddressSpace() != 0)
+    return true;
+
+  // Ignore swifterror addresses.
+  // swifterror memory addresses are mem2reg promoted by instruction
+  // selection. As such they cannot have regular uses like an instrumentation
+  // function and it makes no sense to track them as memory.
+  if (Ptr->isSwiftError())
+    return true;
+
+  return false;
+}
+
+void HWAddressSanitizer::getInterestingMemoryOperands(
+    Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
+  // Skip memory accesses inserted by another instrumentation.
+  if (I->hasMetadata("nosanitize"))
+    return;
+
+  // Do not instrument the load fetching the dynamic shadow address.
   if (ShadowBase == I)
-    return; 
- 
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 
-    if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand())) 
-      return; 
-    Interesting.emplace_back(I, LI->getPointerOperandIndex(), false, 
-                             LI->getType(), LI->getAlign()); 
-  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 
-    if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand())) 
-      return; 
-    Interesting.emplace_back(I, SI->getPointerOperandIndex(), true, 
-                             SI->getValueOperand()->getType(), SI->getAlign()); 
-  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 
-    if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand())) 
-      return; 
-    Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, 
-                             RMW->getValOperand()->getType(), None); 
-  } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { 
-    if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand())) 
-      return; 
-    Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, 
-                             XCHG->getCompareOperand()->getType(), None); 
-  } else if (auto CI = dyn_cast<CallInst>(I)) { 
-    for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) { 
-      if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || 
-          ignoreAccess(CI->getArgOperand(ArgNo))) 
-        continue; 
-      Type *Ty = CI->getParamByValType(ArgNo); 
-      Interesting.emplace_back(I, ArgNo, false, Ty, Align(1)); 
-    } 
-  } 
-} 
- 
-static unsigned getPointerOperandIndex(Instruction *I) { 
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) 
-    return LI->getPointerOperandIndex(); 
-  if (StoreInst *SI = dyn_cast<StoreInst>(I)) 
-    return SI->getPointerOperandIndex(); 
-  if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) 
-    return RMW->getPointerOperandIndex(); 
-  if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) 
-    return XCHG->getPointerOperandIndex(); 
-  report_fatal_error("Unexpected instruction"); 
-  return -1; 
-} 
- 
-static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { 
-  size_t Res = countTrailingZeros(TypeSize / 8); 
-  assert(Res < kNumberOfAccessSizes); 
-  return Res; 
-} 
- 
-void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) { 
-  if (TargetTriple.isAArch64()) 
-    return; 
- 
-  IRBuilder<> IRB(I); 
-  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); 
-  Value *UntaggedPtr = 
-      IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType()); 
-  I->setOperand(getPointerOperandIndex(I), UntaggedPtr); 
-} 
- 
-Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) { 
-  // Mem >> Scale 
-  Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale); 
-  if (Mapping.Offset == 0) 
-    return IRB.CreateIntToPtr(Shadow, Int8PtrTy); 
-  // (Mem >> Scale) + Offset 
+    return;
+
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand()))
+      return;
+    Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
+                             LI->getType(), LI->getAlign());
+  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand()))
+      return;
+    Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
+                             SI->getValueOperand()->getType(), SI->getAlign());
+  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+    if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand()))
+      return;
+    Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
+                             RMW->getValOperand()->getType(), None);
+  } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
+    if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand()))
+      return;
+    Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
+                             XCHG->getCompareOperand()->getType(), None);
+  } else if (auto CI = dyn_cast<CallInst>(I)) {
+    for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) {
+      if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
+          ignoreAccess(CI->getArgOperand(ArgNo)))
+        continue;
+      Type *Ty = CI->getParamByValType(ArgNo);
+      Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
+    }
+  }
+}
+
+static unsigned getPointerOperandIndex(Instruction *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->getPointerOperandIndex();
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->getPointerOperandIndex();
+  if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
+    return RMW->getPointerOperandIndex();
+  if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
+    return XCHG->getPointerOperandIndex();
+  report_fatal_error("Unexpected instruction");
+  return -1;
+}
+
+static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
+  size_t Res = countTrailingZeros(TypeSize / 8);
+  assert(Res < kNumberOfAccessSizes);
+  return Res;
+}
+
+void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
+  if (TargetTriple.isAArch64())
+    return;
+
+  IRBuilder<> IRB(I);
+  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+  Value *UntaggedPtr =
+      IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
+  I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
+}
+
+Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
+  // Mem >> Scale
+  Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
+  if (Mapping.Offset == 0)
+    return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
+  // (Mem >> Scale) + Offset
   return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
-} 
- 
-void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite, 
-                                                   unsigned AccessSizeIndex, 
-                                                   Instruction *InsertBefore) { 
+}
+
+void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
+                                                   unsigned AccessSizeIndex,
+                                                   Instruction *InsertBefore) {
   const int64_t AccessInfo =
       (CompileKernel << HWASanAccessInfo::CompileKernelShift) +
       (HasMatchAllTag << HWASanAccessInfo::HasMatchAllShift) +
@@ -728,809 +728,809 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
       (Recover << HWASanAccessInfo::RecoverShift) +
       (IsWrite << HWASanAccessInfo::IsWriteShift) +
       (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
-  IRBuilder<> IRB(InsertBefore); 
- 
+  IRBuilder<> IRB(InsertBefore);
+
   if (OutlinedChecks) {
-    Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 
-    Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy); 
-    IRB.CreateCall(Intrinsic::getDeclaration( 
-                       M, UseShortGranules 
-                              ? Intrinsic::hwasan_check_memaccess_shortgranules 
-                              : Intrinsic::hwasan_check_memaccess), 
+    Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+    Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
+    IRB.CreateCall(Intrinsic::getDeclaration(
+                       M, UseShortGranules
+                              ? Intrinsic::hwasan_check_memaccess_shortgranules
+                              : Intrinsic::hwasan_check_memaccess),
                    {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
-    return; 
-  } 
- 
-  Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy); 
-  Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, kPointerTagShift), 
-                                  IRB.getInt8Ty()); 
-  Value *AddrLong = untagPointer(IRB, PtrLong); 
-  Value *Shadow = memToShadow(AddrLong, IRB); 
-  Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow); 
-  Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag); 
- 
+    return;
+  }
+
+  Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
+  Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, kPointerTagShift),
+                                  IRB.getInt8Ty());
+  Value *AddrLong = untagPointer(IRB, PtrLong);
+  Value *Shadow = memToShadow(AddrLong, IRB);
+  Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
+  Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
+
   if (HasMatchAllTag) {
     Value *TagNotIgnored = IRB.CreateICmpNE(
         PtrTag, ConstantInt::get(PtrTag->getType(), MatchAllTag));
-    TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored); 
-  } 
- 
-  Instruction *CheckTerm = 
-      SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false, 
-                                MDBuilder(*C).createBranchWeights(1, 100000)); 
- 
-  IRB.SetInsertPoint(CheckTerm); 
-  Value *OutOfShortGranuleTagRange = 
-      IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15)); 
-  Instruction *CheckFailTerm = 
-      SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover, 
-                                MDBuilder(*C).createBranchWeights(1, 100000)); 
- 
-  IRB.SetInsertPoint(CheckTerm); 
-  Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty); 
-  PtrLowBits = IRB.CreateAdd( 
-      PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1)); 
-  Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag); 
-  SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false, 
-                            MDBuilder(*C).createBranchWeights(1, 100000), 
+    TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
+  }
+
+  Instruction *CheckTerm =
+      SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
+                                MDBuilder(*C).createBranchWeights(1, 100000));
+
+  IRB.SetInsertPoint(CheckTerm);
+  Value *OutOfShortGranuleTagRange =
+      IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
+  Instruction *CheckFailTerm =
+      SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
+                                MDBuilder(*C).createBranchWeights(1, 100000));
+
+  IRB.SetInsertPoint(CheckTerm);
+  Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
+  PtrLowBits = IRB.CreateAdd(
+      PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
+  Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
+  SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
+                            MDBuilder(*C).createBranchWeights(1, 100000),
                             (DomTreeUpdater *)nullptr, nullptr,
                             CheckFailTerm->getParent());
- 
-  IRB.SetInsertPoint(CheckTerm); 
-  Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15); 
-  InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy); 
-  Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr); 
-  Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag); 
-  SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false, 
-                            MDBuilder(*C).createBranchWeights(1, 100000), 
+
+  IRB.SetInsertPoint(CheckTerm);
+  Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
+  InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
+  Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
+  Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
+  SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
+                            MDBuilder(*C).createBranchWeights(1, 100000),
                             (DomTreeUpdater *)nullptr, nullptr,
                             CheckFailTerm->getParent());
- 
-  IRB.SetInsertPoint(CheckFailTerm); 
-  InlineAsm *Asm; 
-  switch (TargetTriple.getArch()) { 
-    case Triple::x86_64: 
-      // The signal handler will find the data address in rdi. 
-      Asm = InlineAsm::get( 
-          FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false), 
+
+  IRB.SetInsertPoint(CheckFailTerm);
+  InlineAsm *Asm;
+  switch (TargetTriple.getArch()) {
+    case Triple::x86_64:
+      // The signal handler will find the data address in rdi.
+      Asm = InlineAsm::get(
+          FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
           "int3\nnopl " +
               itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
               "(%rax)",
-          "{rdi}", 
-          /*hasSideEffects=*/true); 
-      break; 
-    case Triple::aarch64: 
-    case Triple::aarch64_be: 
-      // The signal handler will find the data address in x0. 
-      Asm = InlineAsm::get( 
-          FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false), 
+          "{rdi}",
+          /*hasSideEffects=*/true);
+      break;
+    case Triple::aarch64:
+    case Triple::aarch64_be:
+      // The signal handler will find the data address in x0.
+      Asm = InlineAsm::get(
+          FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
           "brk #" +
               itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
-          "{x0}", 
-          /*hasSideEffects=*/true); 
-      break; 
-    default: 
-      report_fatal_error("unsupported architecture"); 
-  } 
-  IRB.CreateCall(Asm, PtrLong); 
-  if (Recover) 
-    cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent()); 
-} 
- 
-void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { 
-  IRBuilder<> IRB(MI); 
-  if (isa<MemTransferInst>(MI)) { 
-    IRB.CreateCall( 
-        isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy, 
-        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), 
-         IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()), 
-         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 
-  } else if (isa<MemSetInst>(MI)) { 
-    IRB.CreateCall( 
-        HWAsanMemset, 
-        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), 
-         IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), 
-         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 
-  } 
-  MI->eraseFromParent(); 
-} 
- 
-bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) { 
-  Value *Addr = O.getPtr(); 
- 
-  LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n"); 
- 
-  if (O.MaybeMask) 
-    return false; //FIXME 
- 
-  IRBuilder<> IRB(O.getInsn()); 
-  if (isPowerOf2_64(O.TypeSize) && 
-      (O.TypeSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) && 
-      (!O.Alignment || *O.Alignment >= (1ULL << Mapping.Scale) || 
-       *O.Alignment >= O.TypeSize / 8)) { 
-    size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeSize); 
-    if (ClInstrumentWithCalls) { 
-      IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex], 
-                     IRB.CreatePointerCast(Addr, IntptrTy)); 
-    } else { 
-      instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn()); 
-    } 
-  } else { 
-    IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], 
-                   {IRB.CreatePointerCast(Addr, IntptrTy), 
-                    ConstantInt::get(IntptrTy, O.TypeSize / 8)}); 
-  } 
-  untagPointerOperand(O.getInsn(), Addr); 
- 
-  return true; 
-} 
- 
-static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) { 
-  uint64_t ArraySize = 1; 
-  if (AI.isArrayAllocation()) { 
-    const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize()); 
-    assert(CI && "non-constant array size"); 
-    ArraySize = CI->getZExtValue(); 
-  } 
-  Type *Ty = AI.getAllocatedType(); 
-  uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty); 
-  return SizeInBytes * ArraySize; 
-} 
- 
-bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, 
-                                   Value *Tag, size_t Size) { 
-  size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); 
-  if (!UseShortGranules) 
-    Size = AlignedSize; 
- 
-  Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty()); 
-  if (ClInstrumentWithCalls) { 
-    IRB.CreateCall(HwasanTagMemoryFunc, 
-                   {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag, 
-                    ConstantInt::get(IntptrTy, AlignedSize)}); 
-  } else { 
-    size_t ShadowSize = Size >> Mapping.Scale; 
-    Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB); 
-    // If this memset is not inlined, it will be intercepted in the hwasan 
-    // runtime library. That's OK, because the interceptor skips the checks if 
-    // the address is in the shadow region. 
-    // FIXME: the interceptor is not as fast as real memset. Consider lowering 
-    // llvm.memset right here into either a sequence of stores, or a call to 
-    // hwasan_tag_memory. 
-    if (ShadowSize) 
-      IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align(1)); 
-    if (Size != AlignedSize) { 
-      IRB.CreateStore( 
-          ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()), 
-          IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize)); 
-      IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32( 
-                                   Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy), 
-                                   AlignedSize - 1)); 
-    } 
-  } 
-  return true; 
-} 
- 
-static unsigned RetagMask(unsigned AllocaNo) { 
-  // A list of 8-bit numbers that have at most one run of non-zero bits. 
-  // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these 
-  // masks. 
-  // The list does not include the value 255, which is used for UAR. 
-  // 
-  // Because we are more likely to use earlier elements of this list than later 
-  // ones, it is sorted in increasing order of probability of collision with a 
-  // mask allocated (temporally) nearby. The program that generated this list 
-  // can be found at: 
-  // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py 
-  static unsigned FastMasks[] = {0,  128, 64,  192, 32,  96,  224, 112, 240, 
-                                 48, 16,  120, 248, 56,  24,  8,   124, 252, 
-                                 60, 28,  12,  4,   126, 254, 62,  30,  14, 
-                                 6,  2,   127, 63,  31,  15,  7,   3,   1}; 
-  return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))]; 
-} 
- 
-Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) { 
-  return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy); 
-} 
- 
-Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) { 
-  if (ClGenerateTagsWithCalls) 
-    return getNextTagWithCall(IRB); 
-  if (StackBaseTag) 
-    return StackBaseTag; 
-  // FIXME: use addressofreturnaddress (but implement it in aarch64 backend 
-  // first). 
-  Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 
-  auto GetStackPointerFn = Intrinsic::getDeclaration( 
-      M, Intrinsic::frameaddress, 
-      IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); 
-  Value *StackPointer = IRB.CreateCall( 
-      GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())}); 
- 
-  // Extract some entropy from the stack pointer for the tags. 
-  // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ 
-  // between functions). 
-  Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy); 
-  Value *StackTag = 
-      IRB.CreateXor(StackPointerLong, IRB.CreateLShr(StackPointerLong, 20), 
-                    "hwasan.stack.base.tag"); 
-  return StackTag; 
-} 
- 
-Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag, 
-                                        AllocaInst *AI, unsigned AllocaNo) { 
-  if (ClGenerateTagsWithCalls) 
-    return getNextTagWithCall(IRB); 
-  return IRB.CreateXor(StackTag, 
-                       ConstantInt::get(IntptrTy, RetagMask(AllocaNo))); 
-} 
- 
-Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) { 
-  if (ClUARRetagToZero) 
-    return ConstantInt::get(IntptrTy, 0); 
-  if (ClGenerateTagsWithCalls) 
-    return getNextTagWithCall(IRB); 
-  return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, 0xFFU)); 
-} 
- 
-// Add a tag to an address. 
-Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty, 
-                                      Value *PtrLong, Value *Tag) { 
-  Value *TaggedPtrLong; 
-  if (CompileKernel) { 
-    // Kernel addresses have 0xFF in the most significant byte. 
-    Value *ShiftedTag = IRB.CreateOr( 
-        IRB.CreateShl(Tag, kPointerTagShift), 
-        ConstantInt::get(IntptrTy, (1ULL << kPointerTagShift) - 1)); 
-    TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag); 
-  } else { 
-    // Userspace can simply do OR (tag << 56); 
-    Value *ShiftedTag = IRB.CreateShl(Tag, kPointerTagShift); 
-    TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag); 
-  } 
-  return IRB.CreateIntToPtr(TaggedPtrLong, Ty); 
-} 
- 
-// Remove tag from an address. 
-Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) { 
-  Value *UntaggedPtrLong; 
-  if (CompileKernel) { 
-    // Kernel addresses have 0xFF in the most significant byte. 
-    UntaggedPtrLong = IRB.CreateOr(PtrLong, 
-        ConstantInt::get(PtrLong->getType(), 0xFFULL << kPointerTagShift)); 
-  } else { 
-    // Userspace addresses have 0x00. 
-    UntaggedPtrLong = IRB.CreateAnd(PtrLong, 
-        ConstantInt::get(PtrLong->getType(), ~(0xFFULL << kPointerTagShift))); 
-  } 
-  return UntaggedPtrLong; 
-} 
- 
-Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) { 
-  Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 
-  if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) { 
-    // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER 
-    // in Bionic's libc/private/bionic_tls.h. 
-    Function *ThreadPointerFunc = 
-        Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); 
-    Value *SlotPtr = IRB.CreatePointerCast( 
-        IRB.CreateConstGEP1_32(IRB.getInt8Ty(), 
-                               IRB.CreateCall(ThreadPointerFunc), 0x30), 
-        Ty->getPointerTo(0)); 
-    return SlotPtr; 
-  } 
-  if (ThreadPtrGlobal) 
-    return ThreadPtrGlobal; 
- 
- 
-  return nullptr; 
-} 
- 
-void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) { 
-  if (!Mapping.InTls) { 
+          "{x0}",
+          /*hasSideEffects=*/true);
+      break;
+    default:
+      report_fatal_error("unsupported architecture");
+  }
+  IRB.CreateCall(Asm, PtrLong);
+  if (Recover)
+    cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
+}
+
+void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+  IRBuilder<> IRB(MI);
+  if (isa<MemTransferInst>(MI)) {
+    IRB.CreateCall(
+        isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
+        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+  } else if (isa<MemSetInst>(MI)) {
+    IRB.CreateCall(
+        HWAsanMemset,
+        {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+         IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+  }
+  MI->eraseFromParent();
+}
+
+bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
+  Value *Addr = O.getPtr();
+
+  LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
+
+  if (O.MaybeMask)
+    return false; //FIXME
+
+  IRBuilder<> IRB(O.getInsn());
+  if (isPowerOf2_64(O.TypeSize) &&
+      (O.TypeSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
+      (!O.Alignment || *O.Alignment >= (1ULL << Mapping.Scale) ||
+       *O.Alignment >= O.TypeSize / 8)) {
+    size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeSize);
+    if (ClInstrumentWithCalls) {
+      IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
+                     IRB.CreatePointerCast(Addr, IntptrTy));
+    } else {
+      instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
+    }
+  } else {
+    IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite],
+                   {IRB.CreatePointerCast(Addr, IntptrTy),
+                    ConstantInt::get(IntptrTy, O.TypeSize / 8)});
+  }
+  untagPointerOperand(O.getInsn(), Addr);
+
+  return true;
+}
+
+static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
+  uint64_t ArraySize = 1;
+  if (AI.isArrayAllocation()) {
+    const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize());
+    assert(CI && "non-constant array size");
+    ArraySize = CI->getZExtValue();
+  }
+  Type *Ty = AI.getAllocatedType();
+  uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty);
+  return SizeInBytes * ArraySize;
+}
+
+bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
+                                   Value *Tag, size_t Size) {
+  size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+  if (!UseShortGranules)
+    Size = AlignedSize;
+
+  Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
+  if (ClInstrumentWithCalls) {
+    IRB.CreateCall(HwasanTagMemoryFunc,
+                   {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
+                    ConstantInt::get(IntptrTy, AlignedSize)});
+  } else {
+    size_t ShadowSize = Size >> Mapping.Scale;
+    Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
+    // If this memset is not inlined, it will be intercepted in the hwasan
+    // runtime library. That's OK, because the interceptor skips the checks if
+    // the address is in the shadow region.
+    // FIXME: the interceptor is not as fast as real memset. Consider lowering
+    // llvm.memset right here into either a sequence of stores, or a call to
+    // hwasan_tag_memory.
+    if (ShadowSize)
+      IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align(1));
+    if (Size != AlignedSize) {
+      IRB.CreateStore(
+          ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()),
+          IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
+      IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
+                                   Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
+                                   AlignedSize - 1));
+    }
+  }
+  return true;
+}
+
+static unsigned RetagMask(unsigned AllocaNo) {
+  // A list of 8-bit numbers that have at most one run of non-zero bits.
+  // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
+  // masks.
+  // The list does not include the value 255, which is used for UAR.
+  //
+  // Because we are more likely to use earlier elements of this list than later
+  // ones, it is sorted in increasing order of probability of collision with a
+  // mask allocated (temporally) nearby. The program that generated this list
+  // can be found at:
+  // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
+  static unsigned FastMasks[] = {0,  128, 64,  192, 32,  96,  224, 112, 240,
+                                 48, 16,  120, 248, 56,  24,  8,   124, 252,
+                                 60, 28,  12,  4,   126, 254, 62,  30,  14,
+                                 6,  2,   127, 63,  31,  15,  7,   3,   1};
+  return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
+}
+
+Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
+  return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
+}
+
+Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
+  if (ClGenerateTagsWithCalls)
+    return getNextTagWithCall(IRB);
+  if (StackBaseTag)
+    return StackBaseTag;
+  // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
+  // first).
+  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+  auto GetStackPointerFn = Intrinsic::getDeclaration(
+      M, Intrinsic::frameaddress,
+      IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
+  Value *StackPointer = IRB.CreateCall(
+      GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())});
+
+  // Extract some entropy from the stack pointer for the tags.
+  // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
+  // between functions).
+  Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy);
+  Value *StackTag =
+      IRB.CreateXor(StackPointerLong, IRB.CreateLShr(StackPointerLong, 20),
+                    "hwasan.stack.base.tag");
+  return StackTag;
+}
+
+Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
+                                        AllocaInst *AI, unsigned AllocaNo) {
+  if (ClGenerateTagsWithCalls)
+    return getNextTagWithCall(IRB);
+  return IRB.CreateXor(StackTag,
+                       ConstantInt::get(IntptrTy, RetagMask(AllocaNo)));
+}
+
+Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) {
+  if (ClUARRetagToZero)
+    return ConstantInt::get(IntptrTy, 0);
+  if (ClGenerateTagsWithCalls)
+    return getNextTagWithCall(IRB);
+  return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, 0xFFU));
+}
+
+// Add a tag to an address.
+Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
+                                      Value *PtrLong, Value *Tag) {
+  Value *TaggedPtrLong;
+  if (CompileKernel) {
+    // Kernel addresses have 0xFF in the most significant byte.
+    Value *ShiftedTag = IRB.CreateOr(
+        IRB.CreateShl(Tag, kPointerTagShift),
+        ConstantInt::get(IntptrTy, (1ULL << kPointerTagShift) - 1));
+    TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
+  } else {
+    // Userspace can simply do OR (tag << 56);
+    Value *ShiftedTag = IRB.CreateShl(Tag, kPointerTagShift);
+    TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
+  }
+  return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
+}
+
+// Remove tag from an address.
+Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
+  Value *UntaggedPtrLong;
+  if (CompileKernel) {
+    // Kernel addresses have 0xFF in the most significant byte.
+    UntaggedPtrLong = IRB.CreateOr(PtrLong,
+        ConstantInt::get(PtrLong->getType(), 0xFFULL << kPointerTagShift));
+  } else {
+    // Userspace addresses have 0x00.
+    UntaggedPtrLong = IRB.CreateAnd(PtrLong,
+        ConstantInt::get(PtrLong->getType(), ~(0xFFULL << kPointerTagShift)));
+  }
+  return UntaggedPtrLong;
+}
+
+Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
+  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+  if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
+    // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
+    // in Bionic's libc/private/bionic_tls.h.
+    Function *ThreadPointerFunc =
+        Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
+    Value *SlotPtr = IRB.CreatePointerCast(
+        IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
+                               IRB.CreateCall(ThreadPointerFunc), 0x30),
+        Ty->getPointerTo(0));
+    return SlotPtr;
+  }
+  if (ThreadPtrGlobal)
+    return ThreadPtrGlobal;
+
+
+  return nullptr;
+}
+
+void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
+  if (!Mapping.InTls) {
     ShadowBase = getShadowNonTls(IRB);
-    return; 
-  } 
- 
-  if (!WithFrameRecord && TargetTriple.isAndroid()) { 
+    return;
+  }
+
+  if (!WithFrameRecord && TargetTriple.isAndroid()) {
     ShadowBase = getDynamicShadowIfunc(IRB);
-    return; 
-  } 
- 
-  Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy); 
-  assert(SlotPtr); 
- 
-  Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr); 
-  // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI. 
-  Value *ThreadLongMaybeUntagged = 
-      TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong); 
- 
-  if (WithFrameRecord) { 
-    Function *F = IRB.GetInsertBlock()->getParent(); 
-    StackBaseTag = IRB.CreateAShr(ThreadLong, 3); 
- 
-    // Prepare ring buffer data. 
-    Value *PC; 
-    if (TargetTriple.getArch() == Triple::aarch64) 
-      PC = readRegister(IRB, "pc"); 
-    else 
-      PC = IRB.CreatePtrToInt(F, IntptrTy); 
-    Module *M = F->getParent(); 
-    auto GetStackPointerFn = Intrinsic::getDeclaration( 
-        M, Intrinsic::frameaddress, 
-        IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); 
-    Value *SP = IRB.CreatePtrToInt( 
-        IRB.CreateCall(GetStackPointerFn, 
-                       {Constant::getNullValue(IRB.getInt32Ty())}), 
-        IntptrTy); 
-    // Mix SP and PC. 
-    // Assumptions: 
-    // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero) 
-    // SP is 0xsssssssssssSSSS0  (4 lower bits are zero) 
-    // We only really need ~20 lower non-zero bits (SSSS), so we mix like this: 
-    //       0xSSSSPPPPPPPPPPPP 
-    SP = IRB.CreateShl(SP, 44); 
- 
-    // Store data to ring buffer. 
-    Value *RecordPtr = 
-        IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0)); 
-    IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr); 
- 
-    // Update the ring buffer. Top byte of ThreadLong defines the size of the 
-    // buffer in pages, it must be a power of two, and the start of the buffer 
-    // must be aligned by twice that much. Therefore wrap around of the ring 
-    // buffer is simply Addr &= ~((ThreadLong >> 56) << 12). 
-    // The use of AShr instead of LShr is due to 
-    //   https://bugs.llvm.org/show_bug.cgi?id=39030 
-    // Runtime library makes sure not to use the highest bit. 
-    Value *WrapMask = IRB.CreateXor( 
-        IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true), 
-        ConstantInt::get(IntptrTy, (uint64_t)-1)); 
-    Value *ThreadLongNew = IRB.CreateAnd( 
-        IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask); 
-    IRB.CreateStore(ThreadLongNew, SlotPtr); 
-  } 
- 
-  // Get shadow base address by aligning RecordPtr up. 
-  // Note: this is not correct if the pointer is already aligned. 
-  // Runtime library will make sure this never happens. 
+    return;
+  }
+
+  Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
+  assert(SlotPtr);
+
+  Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
+  // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
+  Value *ThreadLongMaybeUntagged =
+      TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
+
+  if (WithFrameRecord) {
+    Function *F = IRB.GetInsertBlock()->getParent();
+    StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
+
+    // Prepare ring buffer data.
+    Value *PC;
+    if (TargetTriple.getArch() == Triple::aarch64)
+      PC = readRegister(IRB, "pc");
+    else
+      PC = IRB.CreatePtrToInt(F, IntptrTy);
+    Module *M = F->getParent();
+    auto GetStackPointerFn = Intrinsic::getDeclaration(
+        M, Intrinsic::frameaddress,
+        IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
+    Value *SP = IRB.CreatePtrToInt(
+        IRB.CreateCall(GetStackPointerFn,
+                       {Constant::getNullValue(IRB.getInt32Ty())}),
+        IntptrTy);
+    // Mix SP and PC.
+    // Assumptions:
+    // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
+    // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
+    // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
+    //       0xSSSSPPPPPPPPPPPP
+    SP = IRB.CreateShl(SP, 44);
+
+    // Store data to ring buffer.
+    Value *RecordPtr =
+        IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
+    IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
+
+    // Update the ring buffer. Top byte of ThreadLong defines the size of the
+    // buffer in pages, it must be a power of two, and the start of the buffer
+    // must be aligned by twice that much. Therefore wrap around of the ring
+    // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
+    // The use of AShr instead of LShr is due to
+    //   https://bugs.llvm.org/show_bug.cgi?id=39030
+    // Runtime library makes sure not to use the highest bit.
+    Value *WrapMask = IRB.CreateXor(
+        IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
+        ConstantInt::get(IntptrTy, (uint64_t)-1));
+    Value *ThreadLongNew = IRB.CreateAnd(
+        IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
+    IRB.CreateStore(ThreadLongNew, SlotPtr);
+  }
+
+  // Get shadow base address by aligning RecordPtr up.
+  // Note: this is not correct if the pointer is already aligned.
+  // Runtime library will make sure this never happens.
   ShadowBase = IRB.CreateAdd(
-      IRB.CreateOr( 
-          ThreadLongMaybeUntagged, 
-          ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)), 
-      ConstantInt::get(IntptrTy, 1), "hwasan.shadow"); 
+      IRB.CreateOr(
+          ThreadLongMaybeUntagged,
+          ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
+      ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
   ShadowBase = IRB.CreateIntToPtr(ShadowBase, Int8PtrTy);
-} 
- 
-Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) { 
-  Module *M = IRB.GetInsertBlock()->getParent()->getParent(); 
-  Function *ReadRegister = 
-      Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy); 
-  MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)}); 
-  Value *Args[] = {MetadataAsValue::get(*C, MD)}; 
-  return IRB.CreateCall(ReadRegister, Args); 
-} 
- 
-bool HWAddressSanitizer::instrumentLandingPads( 
-    SmallVectorImpl<Instruction *> &LandingPadVec) { 
-  for (auto *LP : LandingPadVec) { 
-    IRBuilder<> IRB(LP->getNextNode()); 
-    IRB.CreateCall( 
-        HWAsanHandleVfork, 
-        {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp" 
-                                                                      : "sp")}); 
-  } 
-  return true; 
-} 
- 
-bool HWAddressSanitizer::instrumentStack( 
-    SmallVectorImpl<AllocaInst *> &Allocas, 
-    DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap, 
-    SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) { 
-  // Ideally, we want to calculate tagged stack base pointer, and rewrite all 
-  // alloca addresses using that. Unfortunately, offsets are not known yet 
-  // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a 
-  // temp, shift-OR it into each alloca address and xor with the retag mask. 
-  // This generates one extra instruction per alloca use. 
-  for (unsigned N = 0; N < Allocas.size(); ++N) { 
-    auto *AI = Allocas[N]; 
-    IRBuilder<> IRB(AI->getNextNode()); 
- 
-    // Replace uses of the alloca with tagged address. 
-    Value *Tag = getAllocaTag(IRB, StackTag, AI, N); 
-    Value *AILong = IRB.CreatePointerCast(AI, IntptrTy); 
-    Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag); 
-    std::string Name = 
-        AI->hasName() ? AI->getName().str() : "alloca." + itostr(N); 
-    Replacement->setName(Name + ".hwasan"); 
- 
-    AI->replaceUsesWithIf(Replacement, 
-                          [AILong](Use &U) { return U.getUser() != AILong; }); 
- 
-    for (auto *DDI : AllocaDbgMap.lookup(AI)) { 
-      // Prepend "tag_offset, N" to the dwarf expression. 
-      // Tag offset logically applies to the alloca pointer, and it makes sense 
-      // to put it at the beginning of the expression. 
-      SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset, 
-                                         RetagMask(N)}; 
-      DDI->setArgOperand( 
-          2, MetadataAsValue::get(*C, DIExpression::prependOpcodes( 
-                                          DDI->getExpression(), NewOps))); 
-    } 
- 
-    size_t Size = getAllocaSizeInBytes(*AI); 
-    tagAlloca(IRB, AI, Tag, Size); 
- 
-    for (auto RI : RetVec) { 
-      IRB.SetInsertPoint(RI); 
- 
-      // Re-tag alloca memory with the special UAR tag. 
-      Value *Tag = getUARTag(IRB, StackTag); 
-      tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getObjectAlignment())); 
-    } 
-  } 
- 
-  return true; 
-} 
- 
-bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { 
-  return (AI.getAllocatedType()->isSized() && 
-          // FIXME: instrument dynamic allocas, too 
-          AI.isStaticAlloca() && 
-          // alloca() may be called with 0 size, ignore it. 
-          getAllocaSizeInBytes(AI) > 0 && 
-          // We are only interested in allocas not promotable to registers. 
-          // Promotable allocas are common under -O0. 
-          !isAllocaPromotable(&AI) && 
-          // inalloca allocas are not treated as static, and we don't want 
-          // dynamic alloca instrumentation for them as well. 
-          !AI.isUsedWithInAlloca() && 
-          // swifterror allocas are register promoted by ISel 
-          !AI.isSwiftError()); 
-} 
- 
-bool HWAddressSanitizer::sanitizeFunction(Function &F) { 
-  if (&F == HwasanCtorFunction) 
-    return false; 
- 
-  if (!F.hasFnAttribute(Attribute::SanitizeHWAddress)) 
-    return false; 
- 
-  LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n"); 
- 
-  SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument; 
-  SmallVector<MemIntrinsic *, 16> IntrinToInstrument; 
-  SmallVector<AllocaInst*, 8> AllocasToInstrument; 
-  SmallVector<Instruction*, 8> RetVec; 
-  SmallVector<Instruction*, 8> LandingPadVec; 
-  DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap; 
-  for (auto &BB : F) { 
-    for (auto &Inst : BB) { 
-      if (ClInstrumentStack) 
-        if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) { 
-          if (isInterestingAlloca(*AI)) 
-            AllocasToInstrument.push_back(AI); 
-          continue; 
-        } 
- 
-      if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) || 
-          isa<CleanupReturnInst>(Inst)) 
-        RetVec.push_back(&Inst); 
- 
-      if (auto *DDI = dyn_cast<DbgVariableIntrinsic>(&Inst)) 
-        if (auto *Alloca = 
-                dyn_cast_or_null<AllocaInst>(DDI->getVariableLocation())) 
-          AllocaDbgMap[Alloca].push_back(DDI); 
- 
-      if (InstrumentLandingPads && isa<LandingPadInst>(Inst)) 
-        LandingPadVec.push_back(&Inst); 
- 
-      getInterestingMemoryOperands(&Inst, OperandsToInstrument); 
- 
-      if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) 
-        IntrinToInstrument.push_back(MI); 
-    } 
-  } 
- 
-  initializeCallbacks(*F.getParent()); 
- 
-  bool Changed = false; 
- 
-  if (!LandingPadVec.empty()) 
-    Changed |= instrumentLandingPads(LandingPadVec); 
- 
-  if (AllocasToInstrument.empty() && F.hasPersonalityFn() && 
-      F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) { 
-    // __hwasan_personality_thunk is a no-op for functions without an 
-    // instrumented stack, so we can drop it. 
-    F.setPersonalityFn(nullptr); 
-    Changed = true; 
-  } 
- 
-  if (AllocasToInstrument.empty() && OperandsToInstrument.empty() && 
-      IntrinToInstrument.empty()) 
-    return Changed; 
- 
+}
+
+Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
+  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+  Function *ReadRegister =
+      Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
+  MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
+  Value *Args[] = {MetadataAsValue::get(*C, MD)};
+  return IRB.CreateCall(ReadRegister, Args);
+}
+
+bool HWAddressSanitizer::instrumentLandingPads(
+    SmallVectorImpl<Instruction *> &LandingPadVec) {
+  for (auto *LP : LandingPadVec) {
+    IRBuilder<> IRB(LP->getNextNode());
+    IRB.CreateCall(
+        HWAsanHandleVfork,
+        {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
+                                                                      : "sp")});
+  }
+  return true;
+}
+
+bool HWAddressSanitizer::instrumentStack(
+    SmallVectorImpl<AllocaInst *> &Allocas,
+    DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
+    SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
+  // Ideally, we want to calculate tagged stack base pointer, and rewrite all
+  // alloca addresses using that. Unfortunately, offsets are not known yet
+  // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
+  // temp, shift-OR it into each alloca address and xor with the retag mask.
+  // This generates one extra instruction per alloca use.
+  for (unsigned N = 0; N < Allocas.size(); ++N) {
+    auto *AI = Allocas[N];
+    IRBuilder<> IRB(AI->getNextNode());
+
+    // Replace uses of the alloca with tagged address.
+    Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
+    Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
+    Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag);
+    std::string Name =
+        AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
+    Replacement->setName(Name + ".hwasan");
+
+    AI->replaceUsesWithIf(Replacement,
+                          [AILong](Use &U) { return U.getUser() != AILong; });
+
+    for (auto *DDI : AllocaDbgMap.lookup(AI)) {
+      // Prepend "tag_offset, N" to the dwarf expression.
+      // Tag offset logically applies to the alloca pointer, and it makes sense
+      // to put it at the beginning of the expression.
+      SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
+                                         RetagMask(N)};
+      DDI->setArgOperand(
+          2, MetadataAsValue::get(*C, DIExpression::prependOpcodes(
+                                          DDI->getExpression(), NewOps)));
+    }
+
+    size_t Size = getAllocaSizeInBytes(*AI);
+    tagAlloca(IRB, AI, Tag, Size);
+
+    for (auto RI : RetVec) {
+      IRB.SetInsertPoint(RI);
+
+      // Re-tag alloca memory with the special UAR tag.
+      Value *Tag = getUARTag(IRB, StackTag);
+      tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getObjectAlignment()));
+    }
+  }
+
+  return true;
+}
+
+bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
+  return (AI.getAllocatedType()->isSized() &&
+          // FIXME: instrument dynamic allocas, too
+          AI.isStaticAlloca() &&
+          // alloca() may be called with 0 size, ignore it.
+          getAllocaSizeInBytes(AI) > 0 &&
+          // We are only interested in allocas not promotable to registers.
+          // Promotable allocas are common under -O0.
+          !isAllocaPromotable(&AI) &&
+          // inalloca allocas are not treated as static, and we don't want
+          // dynamic alloca instrumentation for them as well.
+          !AI.isUsedWithInAlloca() &&
+          // swifterror allocas are register promoted by ISel
+          !AI.isSwiftError());
+}
+
+bool HWAddressSanitizer::sanitizeFunction(Function &F) {
+  if (&F == HwasanCtorFunction)
+    return false;
+
+  if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
+
+  SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
+  SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
+  SmallVector<AllocaInst*, 8> AllocasToInstrument;
+  SmallVector<Instruction*, 8> RetVec;
+  SmallVector<Instruction*, 8> LandingPadVec;
+  DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap;
+  for (auto &BB : F) {
+    for (auto &Inst : BB) {
+      if (ClInstrumentStack)
+        if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
+          if (isInterestingAlloca(*AI))
+            AllocasToInstrument.push_back(AI);
+          continue;
+        }
+
+      if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) ||
+          isa<CleanupReturnInst>(Inst))
+        RetVec.push_back(&Inst);
+
+      if (auto *DDI = dyn_cast<DbgVariableIntrinsic>(&Inst))
+        if (auto *Alloca =
+                dyn_cast_or_null<AllocaInst>(DDI->getVariableLocation()))
+          AllocaDbgMap[Alloca].push_back(DDI);
+
+      if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
+        LandingPadVec.push_back(&Inst);
+
+      getInterestingMemoryOperands(&Inst, OperandsToInstrument);
+
+      if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
+        IntrinToInstrument.push_back(MI);
+    }
+  }
+
+  initializeCallbacks(*F.getParent());
+
+  bool Changed = false;
+
+  if (!LandingPadVec.empty())
+    Changed |= instrumentLandingPads(LandingPadVec);
+
+  if (AllocasToInstrument.empty() && F.hasPersonalityFn() &&
+      F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
+    // __hwasan_personality_thunk is a no-op for functions without an
+    // instrumented stack, so we can drop it.
+    F.setPersonalityFn(nullptr);
+    Changed = true;
+  }
+
+  if (AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
+      IntrinToInstrument.empty())
+    return Changed;
+
   assert(!ShadowBase);
- 
-  Instruction *InsertPt = &*F.getEntryBlock().begin(); 
-  IRBuilder<> EntryIRB(InsertPt); 
-  emitPrologue(EntryIRB, 
-               /*WithFrameRecord*/ ClRecordStackHistory && 
-                   !AllocasToInstrument.empty()); 
- 
-  if (!AllocasToInstrument.empty()) { 
-    Value *StackTag = 
-        ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB); 
-    instrumentStack(AllocasToInstrument, AllocaDbgMap, RetVec, StackTag); 
-  } 
-  // Pad and align each of the allocas that we instrumented to stop small 
-  // uninteresting allocas from hiding in instrumented alloca's padding and so 
-  // that we have enough space to store real tags for short granules. 
-  DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap; 
-  for (AllocaInst *AI : AllocasToInstrument) { 
-    uint64_t Size = getAllocaSizeInBytes(*AI); 
-    uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment()); 
-    AI->setAlignment( 
-        Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment()))); 
-    if (Size != AlignedSize) { 
-      Type *AllocatedType = AI->getAllocatedType(); 
-      if (AI->isArrayAllocation()) { 
-        uint64_t ArraySize = 
-            cast<ConstantInt>(AI->getArraySize())->getZExtValue(); 
-        AllocatedType = ArrayType::get(AllocatedType, ArraySize); 
-      } 
-      Type *TypeWithPadding = StructType::get( 
-          AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size)); 
-      auto *NewAI = new AllocaInst( 
-          TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI); 
-      NewAI->takeName(AI); 
-      NewAI->setAlignment(AI->getAlign()); 
-      NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca()); 
-      NewAI->setSwiftError(AI->isSwiftError()); 
-      NewAI->copyMetadata(*AI); 
-      auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI); 
-      AI->replaceAllUsesWith(Bitcast); 
-      AllocaToPaddedAllocaMap[AI] = NewAI; 
-    } 
-  } 
- 
-  if (!AllocaToPaddedAllocaMap.empty()) { 
-    for (auto &BB : F) 
-      for (auto &Inst : BB) 
-        if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) 
-          if (auto *AI = 
-                  dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation())) 
-            if (auto *NewAI = AllocaToPaddedAllocaMap.lookup(AI)) 
-              DVI->setArgOperand( 
-                  0, MetadataAsValue::get(*C, LocalAsMetadata::get(NewAI))); 
-    for (auto &P : AllocaToPaddedAllocaMap) 
-      P.first->eraseFromParent(); 
-  } 
- 
-  // If we split the entry block, move any allocas that were originally in the 
-  // entry block back into the entry block so that they aren't treated as 
-  // dynamic allocas. 
-  if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) { 
-    InsertPt = &*F.getEntryBlock().begin(); 
-    for (auto II = EntryIRB.GetInsertBlock()->begin(), 
-              IE = EntryIRB.GetInsertBlock()->end(); 
-         II != IE;) { 
-      Instruction *I = &*II++; 
-      if (auto *AI = dyn_cast<AllocaInst>(I)) 
-        if (isa<ConstantInt>(AI->getArraySize())) 
-          I->moveBefore(InsertPt); 
-    } 
-  } 
- 
-  for (auto &Operand : OperandsToInstrument) 
-    instrumentMemAccess(Operand); 
- 
-  if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) { 
-    for (auto Inst : IntrinToInstrument) 
-      instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); 
-  } 
- 
+
+  Instruction *InsertPt = &*F.getEntryBlock().begin();
+  IRBuilder<> EntryIRB(InsertPt);
+  emitPrologue(EntryIRB,
+               /*WithFrameRecord*/ ClRecordStackHistory &&
+                   !AllocasToInstrument.empty());
+
+  if (!AllocasToInstrument.empty()) {
+    Value *StackTag =
+        ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
+    instrumentStack(AllocasToInstrument, AllocaDbgMap, RetVec, StackTag);
+  }
+  // Pad and align each of the allocas that we instrumented to stop small
+  // uninteresting allocas from hiding in instrumented alloca's padding and so
+  // that we have enough space to store real tags for short granules.
+  DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
+  for (AllocaInst *AI : AllocasToInstrument) {
+    uint64_t Size = getAllocaSizeInBytes(*AI);
+    uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+    AI->setAlignment(
+        Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
+    if (Size != AlignedSize) {
+      Type *AllocatedType = AI->getAllocatedType();
+      if (AI->isArrayAllocation()) {
+        uint64_t ArraySize =
+            cast<ConstantInt>(AI->getArraySize())->getZExtValue();
+        AllocatedType = ArrayType::get(AllocatedType, ArraySize);
+      }
+      Type *TypeWithPadding = StructType::get(
+          AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
+      auto *NewAI = new AllocaInst(
+          TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
+      NewAI->takeName(AI);
+      NewAI->setAlignment(AI->getAlign());
+      NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
+      NewAI->setSwiftError(AI->isSwiftError());
+      NewAI->copyMetadata(*AI);
+      auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
+      AI->replaceAllUsesWith(Bitcast);
+      AllocaToPaddedAllocaMap[AI] = NewAI;
+    }
+  }
+
+  if (!AllocaToPaddedAllocaMap.empty()) {
+    for (auto &BB : F)
+      for (auto &Inst : BB)
+        if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst))
+          if (auto *AI =
+                  dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation()))
+            if (auto *NewAI = AllocaToPaddedAllocaMap.lookup(AI))
+              DVI->setArgOperand(
+                  0, MetadataAsValue::get(*C, LocalAsMetadata::get(NewAI)));
+    for (auto &P : AllocaToPaddedAllocaMap)
+      P.first->eraseFromParent();
+  }
+
+  // If we split the entry block, move any allocas that were originally in the
+  // entry block back into the entry block so that they aren't treated as
+  // dynamic allocas.
+  if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
+    InsertPt = &*F.getEntryBlock().begin();
+    for (auto II = EntryIRB.GetInsertBlock()->begin(),
+              IE = EntryIRB.GetInsertBlock()->end();
+         II != IE;) {
+      Instruction *I = &*II++;
+      if (auto *AI = dyn_cast<AllocaInst>(I))
+        if (isa<ConstantInt>(AI->getArraySize()))
+          I->moveBefore(InsertPt);
+    }
+  }
+
+  for (auto &Operand : OperandsToInstrument)
+    instrumentMemAccess(Operand);
+
+  if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
+    for (auto Inst : IntrinToInstrument)
+      instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
+  }
+
   ShadowBase = nullptr;
-  StackBaseTag = nullptr; 
- 
-  return true; 
-} 
- 
-void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) { 
-  Constant *Initializer = GV->getInitializer(); 
-  uint64_t SizeInBytes = 
-      M.getDataLayout().getTypeAllocSize(Initializer->getType()); 
-  uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment()); 
-  if (SizeInBytes != NewSize) { 
-    // Pad the initializer out to the next multiple of 16 bytes and add the 
-    // required short granule tag. 
-    std::vector<uint8_t> Init(NewSize - SizeInBytes, 0); 
-    Init.back() = Tag; 
-    Constant *Padding = ConstantDataArray::get(*C, Init); 
-    Initializer = ConstantStruct::getAnon({Initializer, Padding}); 
-  } 
- 
-  auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(), 
-                                   GlobalValue::ExternalLinkage, Initializer, 
-                                   GV->getName() + ".hwasan"); 
-  NewGV->copyAttributesFrom(GV); 
-  NewGV->setLinkage(GlobalValue::PrivateLinkage); 
-  NewGV->copyMetadata(GV, 0); 
-  NewGV->setAlignment( 
-      MaybeAlign(std::max(GV->getAlignment(), Mapping.getObjectAlignment()))); 
- 
-  // It is invalid to ICF two globals that have different tags. In the case 
-  // where the size of the global is a multiple of the tag granularity the 
-  // contents of the globals may be the same but the tags (i.e. symbol values) 
-  // may be different, and the symbols are not considered during ICF. In the 
-  // case where the size is not a multiple of the granularity, the short granule 
-  // tags would discriminate two globals with different tags, but there would 
-  // otherwise be nothing stopping such a global from being incorrectly ICF'd 
-  // with an uninstrumented (i.e. tag 0) global that happened to have the short 
-  // granule tag in the last byte. 
-  NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None); 
- 
-  // Descriptor format (assuming little-endian): 
-  // bytes 0-3: relative address of global 
-  // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case 
-  // it isn't, we create multiple descriptors) 
-  // byte 7: tag 
-  auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty); 
-  const uint64_t MaxDescriptorSize = 0xfffff0; 
-  for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes; 
-       DescriptorPos += MaxDescriptorSize) { 
-    auto *Descriptor = 
-        new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage, 
-                           nullptr, GV->getName() + ".hwasan.descriptor"); 
-    auto *GVRelPtr = ConstantExpr::getTrunc( 
-        ConstantExpr::getAdd( 
-            ConstantExpr::getSub( 
-                ConstantExpr::getPtrToInt(NewGV, Int64Ty), 
-                ConstantExpr::getPtrToInt(Descriptor, Int64Ty)), 
-            ConstantInt::get(Int64Ty, DescriptorPos)), 
-        Int32Ty); 
-    uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize); 
-    auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24)); 
-    Descriptor->setComdat(NewGV->getComdat()); 
-    Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag})); 
-    Descriptor->setSection("hwasan_globals"); 
-    Descriptor->setMetadata(LLVMContext::MD_associated, 
-                            MDNode::get(*C, ValueAsMetadata::get(NewGV))); 
-    appendToCompilerUsed(M, Descriptor); 
-  } 
- 
-  Constant *Aliasee = ConstantExpr::getIntToPtr( 
-      ConstantExpr::getAdd( 
-          ConstantExpr::getPtrToInt(NewGV, Int64Ty), 
-          ConstantInt::get(Int64Ty, uint64_t(Tag) << kPointerTagShift)), 
-      GV->getType()); 
-  auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(), 
-                                    GV->getLinkage(), "", Aliasee, &M); 
-  Alias->setVisibility(GV->getVisibility()); 
-  Alias->takeName(GV); 
-  GV->replaceAllUsesWith(Alias); 
-  GV->eraseFromParent(); 
-} 
- 
-void HWAddressSanitizer::instrumentGlobals() { 
-  std::vector<GlobalVariable *> Globals; 
-  for (GlobalVariable &GV : M.globals()) { 
-    if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") || 
-        GV.isThreadLocal()) 
-      continue; 
- 
-    // Common symbols can't have aliases point to them, so they can't be tagged. 
-    if (GV.hasCommonLinkage()) 
-      continue; 
- 
-    // Globals with custom sections may be used in __start_/__stop_ enumeration, 
-    // which would be broken both by adding tags and potentially by the extra 
-    // padding/alignment that we insert. 
-    if (GV.hasSection()) 
-      continue; 
- 
-    Globals.push_back(&GV); 
-  } 
- 
-  MD5 Hasher; 
-  Hasher.update(M.getSourceFileName()); 
-  MD5::MD5Result Hash; 
-  Hasher.final(Hash); 
-  uint8_t Tag = Hash[0]; 
- 
-  for (GlobalVariable *GV : Globals) { 
-    // Skip tag 0 in order to avoid collisions with untagged memory. 
-    if (Tag == 0) 
-      Tag = 1; 
-    instrumentGlobal(GV, Tag++); 
-  } 
-} 
- 
-void HWAddressSanitizer::instrumentPersonalityFunctions() { 
-  // We need to untag stack frames as we unwind past them. That is the job of 
-  // the personality function wrapper, which either wraps an existing 
-  // personality function or acts as a personality function on its own. Each 
-  // function that has a personality function or that can be unwound past has 
-  // its personality function changed to a thunk that calls the personality 
-  // function wrapper in the runtime. 
-  MapVector<Constant *, std::vector<Function *>> PersonalityFns; 
-  for (Function &F : M) { 
-    if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress)) 
-      continue; 
- 
-    if (F.hasPersonalityFn()) { 
-      PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F); 
-    } else if (!F.hasFnAttribute(Attribute::NoUnwind)) { 
-      PersonalityFns[nullptr].push_back(&F); 
-    } 
-  } 
- 
-  if (PersonalityFns.empty()) 
-    return; 
- 
-  FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction( 
-      "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty, 
-      Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy); 
-  FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy); 
-  FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy); 
- 
-  for (auto &P : PersonalityFns) { 
-    std::string ThunkName = kHwasanPersonalityThunkName; 
-    if (P.first) 
-      ThunkName += ("." + P.first->getName()).str(); 
-    FunctionType *ThunkFnTy = FunctionType::get( 
-        Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false); 
-    bool IsLocal = P.first && (!isa<GlobalValue>(P.first) || 
-                               cast<GlobalValue>(P.first)->hasLocalLinkage()); 
-    auto *ThunkFn = Function::Create(ThunkFnTy, 
-                                     IsLocal ? GlobalValue::InternalLinkage 
-                                             : GlobalValue::LinkOnceODRLinkage, 
-                                     ThunkName, &M); 
-    if (!IsLocal) { 
-      ThunkFn->setVisibility(GlobalValue::HiddenVisibility); 
-      ThunkFn->setComdat(M.getOrInsertComdat(ThunkName)); 
-    } 
- 
-    auto *BB = BasicBlock::Create(*C, "entry", ThunkFn); 
-    IRBuilder<> IRB(BB); 
-    CallInst *WrapperCall = IRB.CreateCall( 
-        HwasanPersonalityWrapper, 
-        {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2), 
-         ThunkFn->getArg(3), ThunkFn->getArg(4), 
-         P.first ? IRB.CreateBitCast(P.first, Int8PtrTy) 
-                 : Constant::getNullValue(Int8PtrTy), 
-         IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy), 
-         IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)}); 
-    WrapperCall->setTailCall(); 
-    IRB.CreateRet(WrapperCall); 
- 
-    for (Function *F : P.second) 
-      F->setPersonalityFn(ThunkFn); 
-  } 
-} 
- 
-void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) { 
-  Scale = kDefaultShadowScale; 
-  if (ClMappingOffset.getNumOccurrences() > 0) { 
-    InGlobal = false; 
-    InTls = false; 
-    Offset = ClMappingOffset; 
-  } else if (ClEnableKhwasan || ClInstrumentWithCalls) { 
-    InGlobal = false; 
-    InTls = false; 
-    Offset = 0; 
-  } else if (ClWithIfunc) { 
-    InGlobal = true; 
-    InTls = false; 
-    Offset = kDynamicShadowSentinel; 
-  } else if (ClWithTls) { 
-    InGlobal = false; 
-    InTls = true; 
-    Offset = kDynamicShadowSentinel; 
-  } else { 
-    InGlobal = false; 
-    InTls = false; 
-    Offset = kDynamicShadowSentinel; 
-  } 
-} 
+  StackBaseTag = nullptr;
+
+  return true;
+}
+
+void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
+  Constant *Initializer = GV->getInitializer();
+  uint64_t SizeInBytes =
+      M.getDataLayout().getTypeAllocSize(Initializer->getType());
+  uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
+  if (SizeInBytes != NewSize) {
+    // Pad the initializer out to the next multiple of 16 bytes and add the
+    // required short granule tag.
+    std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
+    Init.back() = Tag;
+    Constant *Padding = ConstantDataArray::get(*C, Init);
+    Initializer = ConstantStruct::getAnon({Initializer, Padding});
+  }
+
+  auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
+                                   GlobalValue::ExternalLinkage, Initializer,
+                                   GV->getName() + ".hwasan");
+  NewGV->copyAttributesFrom(GV);
+  NewGV->setLinkage(GlobalValue::PrivateLinkage);
+  NewGV->copyMetadata(GV, 0);
+  NewGV->setAlignment(
+      MaybeAlign(std::max(GV->getAlignment(), Mapping.getObjectAlignment())));
+
+  // It is invalid to ICF two globals that have different tags. In the case
+  // where the size of the global is a multiple of the tag granularity the
+  // contents of the globals may be the same but the tags (i.e. symbol values)
+  // may be different, and the symbols are not considered during ICF. In the
+  // case where the size is not a multiple of the granularity, the short granule
+  // tags would discriminate two globals with different tags, but there would
+  // otherwise be nothing stopping such a global from being incorrectly ICF'd
+  // with an uninstrumented (i.e. tag 0) global that happened to have the short
+  // granule tag in the last byte.
+  NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
+
+  // Descriptor format (assuming little-endian):
+  // bytes 0-3: relative address of global
+  // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
+  // it isn't, we create multiple descriptors)
+  // byte 7: tag
+  auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
+  const uint64_t MaxDescriptorSize = 0xfffff0;
+  for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
+       DescriptorPos += MaxDescriptorSize) {
+    auto *Descriptor =
+        new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
+                           nullptr, GV->getName() + ".hwasan.descriptor");
+    auto *GVRelPtr = ConstantExpr::getTrunc(
+        ConstantExpr::getAdd(
+            ConstantExpr::getSub(
+                ConstantExpr::getPtrToInt(NewGV, Int64Ty),
+                ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
+            ConstantInt::get(Int64Ty, DescriptorPos)),
+        Int32Ty);
+    uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
+    auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
+    Descriptor->setComdat(NewGV->getComdat());
+    Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
+    Descriptor->setSection("hwasan_globals");
+    Descriptor->setMetadata(LLVMContext::MD_associated,
+                            MDNode::get(*C, ValueAsMetadata::get(NewGV)));
+    appendToCompilerUsed(M, Descriptor);
+  }
+
+  Constant *Aliasee = ConstantExpr::getIntToPtr(
+      ConstantExpr::getAdd(
+          ConstantExpr::getPtrToInt(NewGV, Int64Ty),
+          ConstantInt::get(Int64Ty, uint64_t(Tag) << kPointerTagShift)),
+      GV->getType());
+  auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
+                                    GV->getLinkage(), "", Aliasee, &M);
+  Alias->setVisibility(GV->getVisibility());
+  Alias->takeName(GV);
+  GV->replaceAllUsesWith(Alias);
+  GV->eraseFromParent();
+}
+
+void HWAddressSanitizer::instrumentGlobals() {
+  std::vector<GlobalVariable *> Globals;
+  for (GlobalVariable &GV : M.globals()) {
+    if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
+        GV.isThreadLocal())
+      continue;
+
+    // Common symbols can't have aliases point to them, so they can't be tagged.
+    if (GV.hasCommonLinkage())
+      continue;
+
+    // Globals with custom sections may be used in __start_/__stop_ enumeration,
+    // which would be broken both by adding tags and potentially by the extra
+    // padding/alignment that we insert.
+    if (GV.hasSection())
+      continue;
+
+    Globals.push_back(&GV);
+  }
+
+  MD5 Hasher;
+  Hasher.update(M.getSourceFileName());
+  MD5::MD5Result Hash;
+  Hasher.final(Hash);
+  uint8_t Tag = Hash[0];
+
+  for (GlobalVariable *GV : Globals) {
+    // Skip tag 0 in order to avoid collisions with untagged memory.
+    if (Tag == 0)
+      Tag = 1;
+    instrumentGlobal(GV, Tag++);
+  }
+}
+
+void HWAddressSanitizer::instrumentPersonalityFunctions() {
+  // We need to untag stack frames as we unwind past them. That is the job of
+  // the personality function wrapper, which either wraps an existing
+  // personality function or acts as a personality function on its own. Each
+  // function that has a personality function or that can be unwound past has
+  // its personality function changed to a thunk that calls the personality
+  // function wrapper in the runtime.
+  MapVector<Constant *, std::vector<Function *>> PersonalityFns;
+  for (Function &F : M) {
+    if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
+      continue;
+
+    if (F.hasPersonalityFn()) {
+      PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
+    } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
+      PersonalityFns[nullptr].push_back(&F);
+    }
+  }
+
+  if (PersonalityFns.empty())
+    return;
+
+  FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
+      "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty,
+      Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy);
+  FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
+  FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
+
+  for (auto &P : PersonalityFns) {
+    std::string ThunkName = kHwasanPersonalityThunkName;
+    if (P.first)
+      ThunkName += ("." + P.first->getName()).str();
+    FunctionType *ThunkFnTy = FunctionType::get(
+        Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false);
+    bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
+                               cast<GlobalValue>(P.first)->hasLocalLinkage());
+    auto *ThunkFn = Function::Create(ThunkFnTy,
+                                     IsLocal ? GlobalValue::InternalLinkage
+                                             : GlobalValue::LinkOnceODRLinkage,
+                                     ThunkName, &M);
+    if (!IsLocal) {
+      ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
+      ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
+    }
+
+    auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
+    IRBuilder<> IRB(BB);
+    CallInst *WrapperCall = IRB.CreateCall(
+        HwasanPersonalityWrapper,
+        {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
+         ThunkFn->getArg(3), ThunkFn->getArg(4),
+         P.first ? IRB.CreateBitCast(P.first, Int8PtrTy)
+                 : Constant::getNullValue(Int8PtrTy),
+         IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy),
+         IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)});
+    WrapperCall->setTailCall();
+    IRB.CreateRet(WrapperCall);
+
+    for (Function *F : P.second)
+      F->setPersonalityFn(ThunkFn);
+  }
+}
+
+void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) {
+  Scale = kDefaultShadowScale;
+  if (ClMappingOffset.getNumOccurrences() > 0) {
+    InGlobal = false;
+    InTls = false;
+    Offset = ClMappingOffset;
+  } else if (ClEnableKhwasan || ClInstrumentWithCalls) {
+    InGlobal = false;
+    InTls = false;
+    Offset = 0;
+  } else if (ClWithIfunc) {
+    InGlobal = true;
+    InTls = false;
+    Offset = kDynamicShadowSentinel;
+  } else if (ClWithTls) {
+    InGlobal = false;
+    InTls = true;
+    Offset = kDynamicShadowSentinel;
+  } else {
+    InGlobal = false;
+    InTls = false;
+    Offset = kDynamicShadowSentinel;
+  }
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 6baf7e7dae..5b9557a9b3 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -1,268 +1,268 @@
-//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements the transformation that promotes indirect calls to 
-// conditional direct calls when the indirect-call value profile metadata is 
-// available. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/ADT/ArrayRef.h" 
-#include "llvm/ADT/STLExtras.h" 
-#include "llvm/ADT/SmallVector.h" 
-#include "llvm/ADT/Statistic.h" 
-#include "llvm/ADT/StringRef.h" 
-#include "llvm/Analysis/IndirectCallPromotionAnalysis.h" 
-#include "llvm/Analysis/IndirectCallVisitor.h" 
-#include "llvm/Analysis/OptimizationRemarkEmitter.h" 
-#include "llvm/Analysis/ProfileSummaryInfo.h" 
-#include "llvm/IR/Attributes.h" 
-#include "llvm/IR/BasicBlock.h" 
-#include "llvm/IR/DerivedTypes.h" 
-#include "llvm/IR/DiagnosticInfo.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/InstrTypes.h" 
-#include "llvm/IR/Instruction.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/LLVMContext.h" 
-#include "llvm/IR/MDBuilder.h" 
-#include "llvm/IR/PassManager.h" 
-#include "llvm/IR/Type.h" 
-#include "llvm/IR/Value.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Pass.h" 
-#include "llvm/ProfileData/InstrProf.h" 
-#include "llvm/Support/Casting.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/Error.h" 
-#include "llvm/Support/raw_ostream.h" 
-#include "llvm/Transforms/Instrumentation.h" 
-#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include "llvm/Transforms/Utils/CallPromotionUtils.h" 
-#include <cassert> 
-#include <cstdint> 
-#include <memory> 
-#include <string> 
-#include <utility> 
-#include <vector> 
- 
-using namespace llvm; 
- 
-#define DEBUG_TYPE "pgo-icall-prom" 
- 
-STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); 
-STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); 
- 
-// Command line option to disable indirect-call promotion with the default as 
-// false. This is for debug purpose. 
-static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden, 
-                                cl::desc("Disable indirect call promotion")); 
- 
-// Set the cutoff value for the promotion. If the value is other than 0, we 
-// stop the transformation once the total number of promotions equals the cutoff 
-// value. 
-// For debug use only. 
-static cl::opt<unsigned> 
-    ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore, 
-              cl::desc("Max number of promotions for this compilation")); 
- 
-// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. 
-// For debug use only. 
-static cl::opt<unsigned> 
-    ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore, 
-              cl::desc("Skip Callsite up to this number for this compilation")); 
- 
-// Set if the pass is called in LTO optimization. The difference for LTO mode 
-// is the pass won't prefix the source module name to the internal linkage 
-// symbols. 
-static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden, 
-                                cl::desc("Run indirect-call promotion in LTO " 
-                                         "mode")); 
- 
-// Set if the pass is called in SamplePGO mode. The difference for SamplePGO 
-// mode is it will add prof metadatato the created direct call. 
-static cl::opt<bool> 
-    ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden, 
-                     cl::desc("Run indirect-call promotion in SamplePGO mode")); 
- 
-// If the option is set to true, only call instructions will be considered for 
-// transformation -- invoke instructions will be ignored. 
-static cl::opt<bool> 
-    ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden, 
-                cl::desc("Run indirect-call promotion for call instructions " 
-                         "only")); 
- 
-// If the option is set to true, only invoke instructions will be considered for 
-// transformation -- call instructions will be ignored. 
-static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false), 
-                                   cl::Hidden, 
-                                   cl::desc("Run indirect-call promotion for " 
-                                            "invoke instruction only")); 
- 
-// Dump the function level IR if the transformation happened in this 
-// function. For debug use only. 
-static cl::opt<bool> 
-    ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, 
-                 cl::desc("Dump IR after transformation happens")); 
- 
-namespace { 
- 
-class PGOIndirectCallPromotionLegacyPass : public ModulePass { 
-public: 
-  static char ID; 
- 
-  PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false) 
-      : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) { 
-    initializePGOIndirectCallPromotionLegacyPassPass( 
-        *PassRegistry::getPassRegistry()); 
-  } 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.addRequired<ProfileSummaryInfoWrapperPass>(); 
-  } 
- 
-  StringRef getPassName() const override { return "PGOIndirectCallPromotion"; } 
- 
-private: 
-  bool runOnModule(Module &M) override; 
- 
-  // If this pass is called in LTO. We need to special handling the PGOFuncName 
-  // for the static variables due to LTO's internalization. 
-  bool InLTO; 
- 
-  // If this pass is called in SamplePGO. We need to add the prof metadata to 
-  // the promoted direct call. 
-  bool SamplePGO; 
-}; 
- 
-} // end anonymous namespace 
- 
-char PGOIndirectCallPromotionLegacyPass::ID = 0; 
- 
-INITIALIZE_PASS_BEGIN(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom", 
-                      "Use PGO instrumentation profile to promote indirect " 
-                      "calls to direct calls.", 
-                      false, false) 
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 
-INITIALIZE_PASS_END(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom", 
-                    "Use PGO instrumentation profile to promote indirect " 
-                    "calls to direct calls.", 
-                    false, false) 
- 
-ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO, 
-                                                           bool SamplePGO) { 
-  return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO); 
-} 
- 
-namespace { 
- 
-// The class for main data structure to promote indirect calls to conditional 
-// direct calls. 
-class ICallPromotionFunc { 
-private: 
-  Function &F; 
-  Module *M; 
- 
-  // Symtab that maps indirect call profile values to function names and 
-  // defines. 
-  InstrProfSymtab *Symtab; 
- 
-  bool SamplePGO; 
- 
-  OptimizationRemarkEmitter &ORE; 
- 
-  // A struct that records the direct target and it's call count. 
-  struct PromotionCandidate { 
-    Function *TargetFunction; 
-    uint64_t Count; 
- 
-    PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} 
-  }; 
- 
-  // Check if the indirect-call call site should be promoted. Return the number 
-  // of promotions. Inst is the candidate indirect call, ValueDataRef 
-  // contains the array of value profile data for profiled targets, 
-  // TotalCount is the total profiled count of call executions, and 
-  // NumCandidates is the number of candidate entries in ValueDataRef. 
-  std::vector<PromotionCandidate> getPromotionCandidatesForCallSite( 
-      const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, 
-      uint64_t TotalCount, uint32_t NumCandidates); 
- 
-  // Promote a list of targets for one indirect-call callsite. Return 
-  // the number of promotions. 
-  uint32_t tryToPromote(CallBase &CB, 
-                        const std::vector<PromotionCandidate> &Candidates, 
-                        uint64_t &TotalCount); 
- 
-public: 
-  ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab, 
-                     bool SamplePGO, OptimizationRemarkEmitter &ORE) 
-      : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {} 
-  ICallPromotionFunc(const ICallPromotionFunc &) = delete; 
-  ICallPromotionFunc &operator=(const ICallPromotionFunc &) = delete; 
- 
-  bool processFunction(ProfileSummaryInfo *PSI); 
-}; 
- 
-} // end anonymous namespace 
- 
-// Indirect-call promotion heuristic. The direct targets are sorted based on 
-// the count. Stop at the first target that is not promoted. 
-std::vector<ICallPromotionFunc::PromotionCandidate> 
-ICallPromotionFunc::getPromotionCandidatesForCallSite( 
-    const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef, 
-    uint64_t TotalCount, uint32_t NumCandidates) { 
-  std::vector<PromotionCandidate> Ret; 
- 
-  LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB 
-                    << " Num_targets: " << ValueDataRef.size() 
-                    << " Num_candidates: " << NumCandidates << "\n"); 
-  NumOfPGOICallsites++; 
-  if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) { 
-    LLVM_DEBUG(dbgs() << " Skip: User options.\n"); 
-    return Ret; 
-  } 
- 
-  for (uint32_t I = 0; I < NumCandidates; I++) { 
-    uint64_t Count = ValueDataRef[I].Count; 
-    assert(Count <= TotalCount); 
-    uint64_t Target = ValueDataRef[I].Value; 
-    LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count 
-                      << "  Target_func: " << Target << "\n"); 
- 
-    if (ICPInvokeOnly && isa<CallInst>(CB)) { 
-      LLVM_DEBUG(dbgs() << " Not promote: User options.\n"); 
-      ORE.emit([&]() { 
-        return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) 
-               << " Not promote: User options"; 
-      }); 
-      break; 
-    } 
-    if (ICPCallOnly && isa<InvokeInst>(CB)) { 
-      LLVM_DEBUG(dbgs() << " Not promote: User option.\n"); 
-      ORE.emit([&]() { 
-        return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB) 
-               << " Not promote: User options"; 
-      }); 
-      break; 
-    } 
-    if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { 
-      LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); 
-      ORE.emit([&]() { 
-        return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB) 
-               << " Not promote: Cutoff reached"; 
-      }); 
-      break; 
-    } 
- 
+//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the transformation that promotes indirect calls to
+// conditional direct calls when the indirect-call value profile metadata is
+// available.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pgo-icall-prom"
+
+STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
+STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
+
+// Command line option to disable indirect-call promotion with the default as
+// false. This is for debug purpose.
+static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
+                                cl::desc("Disable indirect call promotion"));
+
+// Set the cutoff value for the promotion. If the value is other than 0, we
+// stop the transformation once the total number of promotions equals the cutoff
+// value.
+// For debug use only.
+static cl::opt<unsigned>
+    ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore,
+              cl::desc("Max number of promotions for this compilation"));
+
+// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
+// For debug use only.
+static cl::opt<unsigned>
+    ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore,
+              cl::desc("Skip Callsite up to this number for this compilation"));
+
+// Set if the pass is called in LTO optimization. The difference for LTO mode
+// is the pass won't prefix the source module name to the internal linkage
+// symbols.
+static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
+                                cl::desc("Run indirect-call promotion in LTO "
+                                         "mode"));
+
+// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
+// mode is it will add prof metadatato the created direct call.
+static cl::opt<bool>
+    ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
+                     cl::desc("Run indirect-call promotion in SamplePGO mode"));
+
+// If the option is set to true, only call instructions will be considered for
+// transformation -- invoke instructions will be ignored.
+static cl::opt<bool>
+    ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
+                cl::desc("Run indirect-call promotion for call instructions "
+                         "only"));
+
+// If the option is set to true, only invoke instructions will be considered for
+// transformation -- call instructions will be ignored.
+static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
+                                   cl::Hidden,
+                                   cl::desc("Run indirect-call promotion for "
+                                            "invoke instruction only"));
+
+// Dump the function level IR if the transformation happened in this
+// function. For debug use only.
+static cl::opt<bool>
+    ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
+                 cl::desc("Dump IR after transformation happens"));
+
+namespace {
+
+class PGOIndirectCallPromotionLegacyPass : public ModulePass {
+public:
+  static char ID;
+
+  PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false)
+      : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) {
+    initializePGOIndirectCallPromotionLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+  }
+
+  StringRef getPassName() const override { return "PGOIndirectCallPromotion"; }
+
+private:
+  bool runOnModule(Module &M) override;
+
+  // If this pass is called in LTO. We need to special handling the PGOFuncName
+  // for the static variables due to LTO's internalization.
+  bool InLTO;
+
+  // If this pass is called in SamplePGO. We need to add the prof metadata to
+  // the promoted direct call.
+  bool SamplePGO;
+};
+
+} // end anonymous namespace
+
+char PGOIndirectCallPromotionLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
+                      "Use PGO instrumentation profile to promote indirect "
+                      "calls to direct calls.",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_END(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
+                    "Use PGO instrumentation profile to promote indirect "
+                    "calls to direct calls.",
+                    false, false)
+
+ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO,
+                                                           bool SamplePGO) {
+  return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO);
+}
+
+namespace {
+
+// The class for main data structure to promote indirect calls to conditional
+// direct calls.
+class ICallPromotionFunc {
+private:
+  Function &F;
+  Module *M;
+
+  // Symtab that maps indirect call profile values to function names and
+  // defines.
+  InstrProfSymtab *Symtab;
+
+  bool SamplePGO;
+
+  OptimizationRemarkEmitter &ORE;
+
+  // A struct that records the direct target and it's call count.
+  struct PromotionCandidate {
+    Function *TargetFunction;
+    uint64_t Count;
+
+    PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
+  };
+
+  // Check if the indirect-call call site should be promoted. Return the number
+  // of promotions. Inst is the candidate indirect call, ValueDataRef
+  // contains the array of value profile data for profiled targets,
+  // TotalCount is the total profiled count of call executions, and
+  // NumCandidates is the number of candidate entries in ValueDataRef.
+  std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
+      const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
+      uint64_t TotalCount, uint32_t NumCandidates);
+
+  // Promote a list of targets for one indirect-call callsite. Return
+  // the number of promotions.
+  uint32_t tryToPromote(CallBase &CB,
+                        const std::vector<PromotionCandidate> &Candidates,
+                        uint64_t &TotalCount);
+
+public:
+  ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab,
+                     bool SamplePGO, OptimizationRemarkEmitter &ORE)
+      : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
+  ICallPromotionFunc(const ICallPromotionFunc &) = delete;
+  ICallPromotionFunc &operator=(const ICallPromotionFunc &) = delete;
+
+  bool processFunction(ProfileSummaryInfo *PSI);
+};
+
+} // end anonymous namespace
+
+// Indirect-call promotion heuristic. The direct targets are sorted based on
+// the count. Stop at the first target that is not promoted.
+std::vector<ICallPromotionFunc::PromotionCandidate>
+ICallPromotionFunc::getPromotionCandidatesForCallSite(
+    const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
+    uint64_t TotalCount, uint32_t NumCandidates) {
+  std::vector<PromotionCandidate> Ret;
+
+  LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
+                    << " Num_targets: " << ValueDataRef.size()
+                    << " Num_candidates: " << NumCandidates << "\n");
+  NumOfPGOICallsites++;
+  if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
+    LLVM_DEBUG(dbgs() << " Skip: User options.\n");
+    return Ret;
+  }
+
+  for (uint32_t I = 0; I < NumCandidates; I++) {
+    uint64_t Count = ValueDataRef[I].Count;
+    assert(Count <= TotalCount);
+    uint64_t Target = ValueDataRef[I].Value;
+    LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
+                      << "  Target_func: " << Target << "\n");
+
+    if (ICPInvokeOnly && isa<CallInst>(CB)) {
+      LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
+               << " Not promote: User options";
+      });
+      break;
+    }
+    if (ICPCallOnly && isa<InvokeInst>(CB)) {
+      LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
+               << " Not promote: User options";
+      });
+      break;
+    }
+    if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
+      LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
+               << " Not promote: Cutoff reached";
+      });
+      break;
+    }
+
     // Don't promote if the symbol is not defined in the module. This avoids
     // creating a reference to a symbol that doesn't exist in the module
     // This can happen when we compile with a sample profile collected from
@@ -270,180 +270,180 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(
     // aren't used in the new binary. We might have a declaration initially in
     // the case where the symbol is globally dead in the binary and removed by
     // ThinLTO.
-    Function *TargetFunction = Symtab->getFunction(Target); 
+    Function *TargetFunction = Symtab->getFunction(Target);
     if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
-      LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n"); 
-      ORE.emit([&]() { 
-        return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB) 
-               << "Cannot promote indirect call: target with md5sum " 
-               << ore::NV("target md5sum", Target) << " not found"; 
-      }); 
-      break; 
-    } 
- 
-    const char *Reason = nullptr; 
-    if (!isLegalToPromote(CB, TargetFunction, &Reason)) { 
-      using namespace ore; 
- 
-      ORE.emit([&]() { 
-        return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB) 
-               << "Cannot promote indirect call to " 
-               << NV("TargetFunction", TargetFunction) << " with count of " 
-               << NV("Count", Count) << ": " << Reason; 
-      }); 
-      break; 
-    } 
- 
-    Ret.push_back(PromotionCandidate(TargetFunction, Count)); 
-    TotalCount -= Count; 
-  } 
-  return Ret; 
-} 
- 
-CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee, 
-                                         uint64_t Count, uint64_t TotalCount, 
-                                         bool AttachProfToDirectCall, 
-                                         OptimizationRemarkEmitter *ORE) { 
- 
-  uint64_t ElseCount = TotalCount - Count; 
-  uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); 
-  uint64_t Scale = calculateCountScale(MaxCount); 
-  MDBuilder MDB(CB.getContext()); 
-  MDNode *BranchWeights = MDB.createBranchWeights( 
-      scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); 
- 
-  CallBase &NewInst = 
-      promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights); 
- 
-  if (AttachProfToDirectCall) { 
-    MDBuilder MDB(NewInst.getContext()); 
-    NewInst.setMetadata( 
-        LLVMContext::MD_prof, 
-        MDB.createBranchWeights({static_cast<uint32_t>(Count)})); 
-  } 
- 
-  using namespace ore; 
- 
-  if (ORE) 
-    ORE->emit([&]() { 
-      return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB) 
-             << "Promote indirect call to " << NV("DirectCallee", DirectCallee) 
-             << " with count " << NV("Count", Count) << " out of " 
-             << NV("TotalCount", TotalCount); 
-    }); 
-  return NewInst; 
-} 
- 
-// Promote indirect-call to conditional direct-call for one callsite. 
-uint32_t ICallPromotionFunc::tryToPromote( 
-    CallBase &CB, const std::vector<PromotionCandidate> &Candidates, 
-    uint64_t &TotalCount) { 
-  uint32_t NumPromoted = 0; 
- 
-  for (auto &C : Candidates) { 
-    uint64_t Count = C.Count; 
-    pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO, 
-                             &ORE); 
-    assert(TotalCount >= Count); 
-    TotalCount -= Count; 
-    NumOfPGOICallPromotion++; 
-    NumPromoted++; 
-  } 
-  return NumPromoted; 
-} 
- 
-// Traverse all the indirect-call callsite and get the value profile 
-// annotation to perform indirect-call promotion. 
-bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) { 
-  bool Changed = false; 
-  ICallPromotionAnalysis ICallAnalysis; 
-  for (auto *CB : findIndirectCalls(F)) { 
-    uint32_t NumVals, NumCandidates; 
-    uint64_t TotalCount; 
-    auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction( 
-        CB, NumVals, TotalCount, NumCandidates); 
-    if (!NumCandidates || 
-        (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount))) 
-      continue; 
-    auto PromotionCandidates = getPromotionCandidatesForCallSite( 
-        *CB, ICallProfDataRef, TotalCount, NumCandidates); 
-    uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount); 
-    if (NumPromoted == 0) 
-      continue; 
- 
-    Changed = true; 
-    // Adjust the MD.prof metadata. First delete the old one. 
-    CB->setMetadata(LLVMContext::MD_prof, nullptr); 
-    // If all promoted, we don't need the MD.prof metadata. 
-    if (TotalCount == 0 || NumPromoted == NumVals) 
-      continue; 
-    // Otherwise we need update with the un-promoted records back. 
-    annotateValueSite(*M, *CB, ICallProfDataRef.slice(NumPromoted), TotalCount, 
-                      IPVK_IndirectCallTarget, NumCandidates); 
-  } 
-  return Changed; 
-} 
- 
-// A wrapper function that does the actual work. 
-static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, 
-                                 bool InLTO, bool SamplePGO, 
-                                 ModuleAnalysisManager *AM = nullptr) { 
-  if (DisableICP) 
-    return false; 
-  InstrProfSymtab Symtab; 
-  if (Error E = Symtab.create(M, InLTO)) { 
-    std::string SymtabFailure = toString(std::move(E)); 
-    LLVM_DEBUG(dbgs() << "Failed to create symtab: " << SymtabFailure << "\n"); 
-    (void)SymtabFailure; 
-    return false; 
-  } 
-  bool Changed = false; 
-  for (auto &F : M) { 
-    if (F.isDeclaration() || F.hasOptNone()) 
-      continue; 
- 
-    std::unique_ptr<OptimizationRemarkEmitter> OwnedORE; 
-    OptimizationRemarkEmitter *ORE; 
-    if (AM) { 
-      auto &FAM = 
-          AM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 
-      ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 
-    } else { 
-      OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F); 
-      ORE = OwnedORE.get(); 
-    } 
- 
-    ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, *ORE); 
-    bool FuncChanged = ICallPromotion.processFunction(PSI); 
-    if (ICPDUMPAFTER && FuncChanged) { 
-      LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs())); 
-      LLVM_DEBUG(dbgs() << "\n"); 
-    } 
-    Changed |= FuncChanged; 
-    if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { 
-      LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n"); 
-      break; 
-    } 
-  } 
-  return Changed; 
-} 
- 
-bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) { 
-  ProfileSummaryInfo *PSI = 
-      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 
- 
-  // Command-line option has the priority for InLTO. 
-  return promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode, 
-                              SamplePGO | ICPSamplePGOMode); 
-} 
- 
-PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, 
-                                                ModuleAnalysisManager &AM) { 
-  ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 
- 
-  if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode, 
-                            SamplePGO | ICPSamplePGOMode, &AM)) 
-    return PreservedAnalyses::all(); 
- 
-  return PreservedAnalyses::none(); 
-} 
+      LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
+               << "Cannot promote indirect call: target with md5sum "
+               << ore::NV("target md5sum", Target) << " not found";
+      });
+      break;
+    }
+
+    const char *Reason = nullptr;
+    if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
+      using namespace ore;
+
+      ORE.emit([&]() {
+        return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
+               << "Cannot promote indirect call to "
+               << NV("TargetFunction", TargetFunction) << " with count of "
+               << NV("Count", Count) << ": " << Reason;
+      });
+      break;
+    }
+
+    Ret.push_back(PromotionCandidate(TargetFunction, Count));
+    TotalCount -= Count;
+  }
+  return Ret;
+}
+
+CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
+                                         uint64_t Count, uint64_t TotalCount,
+                                         bool AttachProfToDirectCall,
+                                         OptimizationRemarkEmitter *ORE) {
+
+  uint64_t ElseCount = TotalCount - Count;
+  uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
+  uint64_t Scale = calculateCountScale(MaxCount);
+  MDBuilder MDB(CB.getContext());
+  MDNode *BranchWeights = MDB.createBranchWeights(
+      scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
+
+  CallBase &NewInst =
+      promoteCallWithIfThenElse(CB, DirectCallee, BranchWeights);
+
+  if (AttachProfToDirectCall) {
+    MDBuilder MDB(NewInst.getContext());
+    NewInst.setMetadata(
+        LLVMContext::MD_prof,
+        MDB.createBranchWeights({static_cast<uint32_t>(Count)}));
+  }
+
+  using namespace ore;
+
+  if (ORE)
+    ORE->emit([&]() {
+      return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
+             << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
+             << " with count " << NV("Count", Count) << " out of "
+             << NV("TotalCount", TotalCount);
+    });
+  return NewInst;
+}
+
+// Promote indirect-call to conditional direct-call for one callsite.
+uint32_t ICallPromotionFunc::tryToPromote(
+    CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
+    uint64_t &TotalCount) {
+  uint32_t NumPromoted = 0;
+
+  for (auto &C : Candidates) {
+    uint64_t Count = C.Count;
+    pgo::promoteIndirectCall(CB, C.TargetFunction, Count, TotalCount, SamplePGO,
+                             &ORE);
+    assert(TotalCount >= Count);
+    TotalCount -= Count;
+    NumOfPGOICallPromotion++;
+    NumPromoted++;
+  }
+  return NumPromoted;
+}
+
+// Traverse all the indirect-call callsite and get the value profile
+// annotation to perform indirect-call promotion.
+bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) {
+  bool Changed = false;
+  ICallPromotionAnalysis ICallAnalysis;
+  for (auto *CB : findIndirectCalls(F)) {
+    uint32_t NumVals, NumCandidates;
+    uint64_t TotalCount;
+    auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
+        CB, NumVals, TotalCount, NumCandidates);
+    if (!NumCandidates ||
+        (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
+      continue;
+    auto PromotionCandidates = getPromotionCandidatesForCallSite(
+        *CB, ICallProfDataRef, TotalCount, NumCandidates);
+    uint32_t NumPromoted = tryToPromote(*CB, PromotionCandidates, TotalCount);
+    if (NumPromoted == 0)
+      continue;
+
+    Changed = true;
+    // Adjust the MD.prof metadata. First delete the old one.
+    CB->setMetadata(LLVMContext::MD_prof, nullptr);
+    // If all promoted, we don't need the MD.prof metadata.
+    if (TotalCount == 0 || NumPromoted == NumVals)
+      continue;
+    // Otherwise we need update with the un-promoted records back.
+    annotateValueSite(*M, *CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
+                      IPVK_IndirectCallTarget, NumCandidates);
+  }
+  return Changed;
+}
+
+// A wrapper function that does the actual work.
+static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
+                                 bool InLTO, bool SamplePGO,
+                                 ModuleAnalysisManager *AM = nullptr) {
+  if (DisableICP)
+    return false;
+  InstrProfSymtab Symtab;
+  if (Error E = Symtab.create(M, InLTO)) {
+    std::string SymtabFailure = toString(std::move(E));
+    LLVM_DEBUG(dbgs() << "Failed to create symtab: " << SymtabFailure << "\n");
+    (void)SymtabFailure;
+    return false;
+  }
+  bool Changed = false;
+  for (auto &F : M) {
+    if (F.isDeclaration() || F.hasOptNone())
+      continue;
+
+    std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
+    OptimizationRemarkEmitter *ORE;
+    if (AM) {
+      auto &FAM =
+          AM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+      ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+    } else {
+      OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
+      ORE = OwnedORE.get();
+    }
+
+    ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, *ORE);
+    bool FuncChanged = ICallPromotion.processFunction(PSI);
+    if (ICPDUMPAFTER && FuncChanged) {
+      LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
+      LLVM_DEBUG(dbgs() << "\n");
+    }
+    Changed |= FuncChanged;
+    if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
+      LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
+      break;
+    }
+  }
+  return Changed;
+}
+
+bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
+  ProfileSummaryInfo *PSI =
+      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
+  // Command-line option has the priority for InLTO.
+  return promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
+                              SamplePGO | ICPSamplePGOMode);
+}
+
+PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
+                                                ModuleAnalysisManager &AM) {
+  ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
+  if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
+                            SamplePGO | ICPSamplePGOMode, &AM))
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/InstrOrderFile.cpp
index 0addfb46b2..853385fbf8 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/InstrOrderFile.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -1,212 +1,212 @@
-//===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===// 
-// 
-//                     The LLVM Compiler Infrastructure 
-// 
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details. 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/InstrOrderFile.h" 
-#include "llvm/ADT/Statistic.h" 
-#include "llvm/IR/Constants.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/GlobalValue.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/Instruction.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/Metadata.h" 
-#include "llvm/IR/Module.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Pass.h" 
-#include "llvm/PassRegistry.h" 
-#include "llvm/ProfileData/InstrProf.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/FileSystem.h" 
-#include "llvm/Support/Path.h" 
-#include "llvm/Support/raw_ostream.h" 
-#include "llvm/Transforms/Instrumentation.h" 
-#include <fstream> 
-#include <map> 
-#include <mutex> 
-#include <set> 
-#include <sstream> 
- 
-using namespace llvm; 
-#define DEBUG_TYPE "instrorderfile" 
- 
-static cl::opt<std::string> ClOrderFileWriteMapping( 
-    "orderfile-write-mapping", cl::init(""), 
-    cl::desc( 
-        "Dump functions and their MD5 hash to deobfuscate profile data"), 
-    cl::Hidden); 
- 
-namespace { 
- 
-// We need a global bitmap to tell if a function is executed. We also 
-// need a global variable to save the order of functions. We can use a 
-// fixed-size buffer that saves the MD5 hash of the function. We need 
-// a global variable to save the index into the buffer. 
- 
-std::mutex MappingMutex; 
- 
-struct InstrOrderFile { 
-private: 
-  GlobalVariable *OrderFileBuffer; 
-  GlobalVariable *BufferIdx; 
-  GlobalVariable *BitMap; 
-  ArrayType *BufferTy; 
-  ArrayType *MapTy; 
- 
-public: 
-  InstrOrderFile() {} 
- 
-  void createOrderFileData(Module &M) { 
-    LLVMContext &Ctx = M.getContext(); 
-    int NumFunctions = 0; 
-    for (Function &F : M) { 
-      if (!F.isDeclaration()) 
-        NumFunctions++; 
-    } 
- 
-    BufferTy = 
-        ArrayType::get(Type::getInt64Ty(Ctx), INSTR_ORDER_FILE_BUFFER_SIZE); 
-    Type *IdxTy = Type::getInt32Ty(Ctx); 
-    MapTy = ArrayType::get(Type::getInt8Ty(Ctx), NumFunctions); 
- 
-    // Create the global variables. 
-    std::string SymbolName = INSTR_PROF_ORDERFILE_BUFFER_NAME_STR; 
-    OrderFileBuffer = new GlobalVariable(M, BufferTy, false, GlobalValue::LinkOnceODRLinkage, 
-                           Constant::getNullValue(BufferTy), SymbolName); 
-    Triple TT = Triple(M.getTargetTriple()); 
-    OrderFileBuffer->setSection( 
-        getInstrProfSectionName(IPSK_orderfile, TT.getObjectFormat())); 
- 
-    std::string IndexName = INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR; 
-    BufferIdx = new GlobalVariable(M, IdxTy, false, GlobalValue::LinkOnceODRLinkage, 
-                           Constant::getNullValue(IdxTy), IndexName); 
- 
-    std::string BitMapName = "bitmap_0"; 
-    BitMap = new GlobalVariable(M, MapTy, false, GlobalValue::PrivateLinkage, 
-                                Constant::getNullValue(MapTy), BitMapName); 
-  } 
- 
-  // Generate the code sequence in the entry block of each function to 
-  // update the buffer. 
-  void generateCodeSequence(Module &M, Function &F, int FuncId) { 
-    if (!ClOrderFileWriteMapping.empty()) { 
-      std::lock_guard<std::mutex> LogLock(MappingMutex); 
-      std::error_code EC; 
-      llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC, 
-                              llvm::sys::fs::OF_Append); 
-      if (EC) { 
-        report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping + 
-                           " to save mapping file for order file instrumentation\n"); 
-      } else { 
-        std::stringstream stream; 
-        stream << std::hex << MD5Hash(F.getName()); 
-        std::string singleLine = "MD5 " + stream.str() + " " + 
-                                 std::string(F.getName()) + '\n'; 
-        OS << singleLine; 
-      } 
-    } 
- 
-    BasicBlock *OrigEntry = &F.getEntryBlock(); 
- 
-    LLVMContext &Ctx = M.getContext(); 
-    IntegerType *Int32Ty = Type::getInt32Ty(Ctx); 
-    IntegerType *Int8Ty = Type::getInt8Ty(Ctx); 
- 
-    // Create a new entry block for instrumentation. We will check the bitmap 
-    // in this basic block. 
-    BasicBlock *NewEntry = 
-        BasicBlock::Create(M.getContext(), "order_file_entry", &F, OrigEntry); 
-    IRBuilder<> entryB(NewEntry); 
-    // Create a basic block for updating the circular buffer. 
-    BasicBlock *UpdateOrderFileBB = 
-        BasicBlock::Create(M.getContext(), "order_file_set", &F, OrigEntry); 
-    IRBuilder<> updateB(UpdateOrderFileBB); 
- 
-    // Check the bitmap, if it is already 1, do nothing. 
-    // Otherwise, set the bit, grab the index, update the buffer. 
-    Value *IdxFlags[] = {ConstantInt::get(Int32Ty, 0), 
-                         ConstantInt::get(Int32Ty, FuncId)}; 
-    Value *MapAddr = entryB.CreateGEP(MapTy, BitMap, IdxFlags, ""); 
-    LoadInst *loadBitMap = entryB.CreateLoad(Int8Ty, MapAddr, ""); 
-    entryB.CreateStore(ConstantInt::get(Int8Ty, 1), MapAddr); 
-    Value *IsNotExecuted = 
-        entryB.CreateICmpEQ(loadBitMap, ConstantInt::get(Int8Ty, 0)); 
-    entryB.CreateCondBr(IsNotExecuted, UpdateOrderFileBB, OrigEntry); 
- 
-    // Fill up UpdateOrderFileBB: grab the index, update the buffer! 
-    Value *IdxVal = updateB.CreateAtomicRMW( 
-        AtomicRMWInst::Add, BufferIdx, ConstantInt::get(Int32Ty, 1), 
-        AtomicOrdering::SequentiallyConsistent); 
-    // We need to wrap around the index to fit it inside the buffer. 
-    Value *WrappedIdx = updateB.CreateAnd( 
-        IdxVal, ConstantInt::get(Int32Ty, INSTR_ORDER_FILE_BUFFER_MASK)); 
-    Value *BufferGEPIdx[] = {ConstantInt::get(Int32Ty, 0), WrappedIdx}; 
-    Value *BufferAddr = 
-        updateB.CreateGEP(BufferTy, OrderFileBuffer, BufferGEPIdx, ""); 
-    updateB.CreateStore(ConstantInt::get(Type::getInt64Ty(Ctx), MD5Hash(F.getName())), 
-                        BufferAddr); 
-    updateB.CreateBr(OrigEntry); 
-  } 
- 
-  bool run(Module &M) { 
-    createOrderFileData(M); 
- 
-    int FuncId = 0; 
-    for (Function &F : M) { 
-      if (F.isDeclaration()) 
-        continue; 
-      generateCodeSequence(M, F, FuncId); 
-      ++FuncId; 
-    } 
- 
-    return true; 
-  } 
- 
-}; // End of InstrOrderFile struct 
- 
-class InstrOrderFileLegacyPass : public ModulePass { 
-public: 
-  static char ID; 
- 
-  InstrOrderFileLegacyPass() : ModulePass(ID) { 
-    initializeInstrOrderFileLegacyPassPass( 
-        *PassRegistry::getPassRegistry()); 
-  } 
- 
-  bool runOnModule(Module &M) override; 
-}; 
- 
-} // End anonymous namespace 
- 
-bool InstrOrderFileLegacyPass::runOnModule(Module &M) { 
-  if (skipModule(M)) 
-    return false; 
- 
-  return InstrOrderFile().run(M); 
-} 
- 
-PreservedAnalyses 
-InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) { 
-  if (InstrOrderFile().run(M)) 
-    return PreservedAnalyses::none(); 
-  return PreservedAnalyses::all(); 
-} 
- 
-INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile", 
-                      "Instrumentation for Order File", false, false) 
-INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile", 
-                    "Instrumentation for Order File", false, false) 
- 
-char InstrOrderFileLegacyPass::ID = 0; 
- 
-ModulePass *llvm::createInstrOrderFilePass() { 
-  return new InstrOrderFileLegacyPass(); 
-} 
+//===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include <fstream>
+#include <map>
+#include <mutex>
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+#define DEBUG_TYPE "instrorderfile"
+
+static cl::opt<std::string> ClOrderFileWriteMapping(
+    "orderfile-write-mapping", cl::init(""),
+    cl::desc(
+        "Dump functions and their MD5 hash to deobfuscate profile data"),
+    cl::Hidden);
+
+namespace {
+
+// We need a global bitmap to tell if a function is executed. We also
+// need a global variable to save the order of functions. We can use a
+// fixed-size buffer that saves the MD5 hash of the function. We need
+// a global variable to save the index into the buffer.
+
+std::mutex MappingMutex;
+
+struct InstrOrderFile {
+private:
+  GlobalVariable *OrderFileBuffer;
+  GlobalVariable *BufferIdx;
+  GlobalVariable *BitMap;
+  ArrayType *BufferTy;
+  ArrayType *MapTy;
+
+public:
+  InstrOrderFile() {}
+
+  void createOrderFileData(Module &M) {
+    LLVMContext &Ctx = M.getContext();
+    int NumFunctions = 0;
+    for (Function &F : M) {
+      if (!F.isDeclaration())
+        NumFunctions++;
+    }
+
+    BufferTy =
+        ArrayType::get(Type::getInt64Ty(Ctx), INSTR_ORDER_FILE_BUFFER_SIZE);
+    Type *IdxTy = Type::getInt32Ty(Ctx);
+    MapTy = ArrayType::get(Type::getInt8Ty(Ctx), NumFunctions);
+
+    // Create the global variables.
+    std::string SymbolName = INSTR_PROF_ORDERFILE_BUFFER_NAME_STR;
+    OrderFileBuffer = new GlobalVariable(M, BufferTy, false, GlobalValue::LinkOnceODRLinkage,
+                           Constant::getNullValue(BufferTy), SymbolName);
+    Triple TT = Triple(M.getTargetTriple());
+    OrderFileBuffer->setSection(
+        getInstrProfSectionName(IPSK_orderfile, TT.getObjectFormat()));
+
+    std::string IndexName = INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR;
+    BufferIdx = new GlobalVariable(M, IdxTy, false, GlobalValue::LinkOnceODRLinkage,
+                           Constant::getNullValue(IdxTy), IndexName);
+
+    std::string BitMapName = "bitmap_0";
+    BitMap = new GlobalVariable(M, MapTy, false, GlobalValue::PrivateLinkage,
+                                Constant::getNullValue(MapTy), BitMapName);
+  }
+
+  // Generate the code sequence in the entry block of each function to
+  // update the buffer.
+  void generateCodeSequence(Module &M, Function &F, int FuncId) {
+    if (!ClOrderFileWriteMapping.empty()) {
+      std::lock_guard<std::mutex> LogLock(MappingMutex);
+      std::error_code EC;
+      llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC,
+                              llvm::sys::fs::OF_Append);
+      if (EC) {
+        report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping +
+                           " to save mapping file for order file instrumentation\n");
+      } else {
+        std::stringstream stream;
+        stream << std::hex << MD5Hash(F.getName());
+        std::string singleLine = "MD5 " + stream.str() + " " +
+                                 std::string(F.getName()) + '\n';
+        OS << singleLine;
+      }
+    }
+
+    BasicBlock *OrigEntry = &F.getEntryBlock();
+
+    LLVMContext &Ctx = M.getContext();
+    IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+    IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
+
+    // Create a new entry block for instrumentation. We will check the bitmap
+    // in this basic block.
+    BasicBlock *NewEntry =
+        BasicBlock::Create(M.getContext(), "order_file_entry", &F, OrigEntry);
+    IRBuilder<> entryB(NewEntry);
+    // Create a basic block for updating the circular buffer.
+    BasicBlock *UpdateOrderFileBB =
+        BasicBlock::Create(M.getContext(), "order_file_set", &F, OrigEntry);
+    IRBuilder<> updateB(UpdateOrderFileBB);
+
+    // Check the bitmap, if it is already 1, do nothing.
+    // Otherwise, set the bit, grab the index, update the buffer.
+    Value *IdxFlags[] = {ConstantInt::get(Int32Ty, 0),
+                         ConstantInt::get(Int32Ty, FuncId)};
+    Value *MapAddr = entryB.CreateGEP(MapTy, BitMap, IdxFlags, "");
+    LoadInst *loadBitMap = entryB.CreateLoad(Int8Ty, MapAddr, "");
+    entryB.CreateStore(ConstantInt::get(Int8Ty, 1), MapAddr);
+    Value *IsNotExecuted =
+        entryB.CreateICmpEQ(loadBitMap, ConstantInt::get(Int8Ty, 0));
+    entryB.CreateCondBr(IsNotExecuted, UpdateOrderFileBB, OrigEntry);
+
+    // Fill up UpdateOrderFileBB: grab the index, update the buffer!
+    Value *IdxVal = updateB.CreateAtomicRMW(
+        AtomicRMWInst::Add, BufferIdx, ConstantInt::get(Int32Ty, 1),
+        AtomicOrdering::SequentiallyConsistent);
+    // We need to wrap around the index to fit it inside the buffer.
+    Value *WrappedIdx = updateB.CreateAnd(
+        IdxVal, ConstantInt::get(Int32Ty, INSTR_ORDER_FILE_BUFFER_MASK));
+    Value *BufferGEPIdx[] = {ConstantInt::get(Int32Ty, 0), WrappedIdx};
+    Value *BufferAddr =
+        updateB.CreateGEP(BufferTy, OrderFileBuffer, BufferGEPIdx, "");
+    updateB.CreateStore(ConstantInt::get(Type::getInt64Ty(Ctx), MD5Hash(F.getName())),
+                        BufferAddr);
+    updateB.CreateBr(OrigEntry);
+  }
+
+  bool run(Module &M) {
+    createOrderFileData(M);
+
+    int FuncId = 0;
+    for (Function &F : M) {
+      if (F.isDeclaration())
+        continue;
+      generateCodeSequence(M, F, FuncId);
+      ++FuncId;
+    }
+
+    return true;
+  }
+
+}; // End of InstrOrderFile struct
+
+class InstrOrderFileLegacyPass : public ModulePass {
+public:
+  static char ID;
+
+  InstrOrderFileLegacyPass() : ModulePass(ID) {
+    initializeInstrOrderFileLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override;
+};
+
+} // End anonymous namespace
+
+bool InstrOrderFileLegacyPass::runOnModule(Module &M) {
+  if (skipModule(M))
+    return false;
+
+  return InstrOrderFile().run(M);
+}
+
+PreservedAnalyses
+InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) {
+  if (InstrOrderFile().run(M))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile",
+                      "Instrumentation for Order File", false, false)
+INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile",
+                    "Instrumentation for Order File", false, false)
+
+char InstrOrderFileLegacyPass::ID = 0;
+
+ModulePass *llvm::createInstrOrderFilePass() {
+  return new InstrOrderFileLegacyPass();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 925c018135..9efc7d1ac5 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1,266 +1,266 @@
-//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling. 
-// It also builds the data structures and initialization code needed for 
-// updating execution counts and emitting the profile at runtime. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/InstrProfiling.h" 
-#include "llvm/ADT/ArrayRef.h" 
-#include "llvm/ADT/SmallVector.h" 
-#include "llvm/ADT/StringRef.h" 
-#include "llvm/ADT/Triple.h" 
-#include "llvm/ADT/Twine.h" 
-#include "llvm/Analysis/BlockFrequencyInfo.h" 
-#include "llvm/Analysis/BranchProbabilityInfo.h" 
-#include "llvm/Analysis/LoopInfo.h" 
-#include "llvm/Analysis/TargetLibraryInfo.h" 
-#include "llvm/IR/Attributes.h" 
-#include "llvm/IR/BasicBlock.h" 
-#include "llvm/IR/Constant.h" 
-#include "llvm/IR/Constants.h" 
-#include "llvm/IR/DerivedTypes.h" 
-#include "llvm/IR/Dominators.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/GlobalValue.h" 
-#include "llvm/IR/GlobalVariable.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/Instruction.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/IR/Module.h" 
-#include "llvm/IR/Type.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Pass.h" 
-#include "llvm/ProfileData/InstrProf.h" 
-#include "llvm/Support/Casting.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Error.h" 
-#include "llvm/Support/ErrorHandling.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include "llvm/Transforms/Utils/ModuleUtils.h" 
-#include "llvm/Transforms/Utils/SSAUpdater.h" 
-#include <algorithm> 
-#include <cassert> 
-#include <cstddef> 
-#include <cstdint> 
-#include <string> 
- 
-using namespace llvm; 
- 
-#define DEBUG_TYPE "instrprof" 
- 
-namespace { 
- 
-cl::opt<bool> DoHashBasedCounterSplit( 
-    "hash-based-counter-split", 
-    cl::desc("Rename counter variable of a comdat function based on cfg hash"), 
-    cl::init(true)); 
- 
-cl::opt<bool> RuntimeCounterRelocation( 
-    "runtime-counter-relocation", 
-    cl::desc("Enable relocating counters at runtime."), 
-    cl::init(false)); 
- 
-cl::opt<bool> ValueProfileStaticAlloc( 
-    "vp-static-alloc", 
-    cl::desc("Do static counter allocation for value profiler"), 
-    cl::init(true)); 
- 
-cl::opt<double> NumCountersPerValueSite( 
-    "vp-counters-per-site", 
-    cl::desc("The average number of profile counters allocated " 
-             "per value profiling site."), 
-    // This is set to a very small value because in real programs, only 
-    // a very small percentage of value sites have non-zero targets, e.g, 1/30. 
-    // For those sites with non-zero profile, the average number of targets 
-    // is usually smaller than 2. 
-    cl::init(1.0)); 
- 
-cl::opt<bool> AtomicCounterUpdateAll( 
-    "instrprof-atomic-counter-update-all", cl::ZeroOrMore, 
-    cl::desc("Make all profile counter updates atomic (for testing only)"), 
-    cl::init(false)); 
- 
-cl::opt<bool> AtomicCounterUpdatePromoted( 
-    "atomic-counter-update-promoted", cl::ZeroOrMore, 
-    cl::desc("Do counter update using atomic fetch add " 
-             " for promoted counters only"), 
-    cl::init(false)); 
- 
-cl::opt<bool> AtomicFirstCounter( 
-    "atomic-first-counter", cl::ZeroOrMore, 
-    cl::desc("Use atomic fetch add for first counter in a function (usually " 
-             "the entry counter)"), 
-    cl::init(false)); 
- 
-// If the option is not specified, the default behavior about whether 
-// counter promotion is done depends on how instrumentaiton lowering 
-// pipeline is setup, i.e., the default value of true of this option 
-// does not mean the promotion will be done by default. Explicitly 
-// setting this option can override the default behavior. 
-cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore, 
-                                 cl::desc("Do counter register promotion"), 
-                                 cl::init(false)); 
-cl::opt<unsigned> MaxNumOfPromotionsPerLoop( 
-    cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20), 
-    cl::desc("Max number counter promotions per loop to avoid" 
-             " increasing register pressure too much")); 
- 
-// A debug option 
-cl::opt<int> 
-    MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1), 
-                       cl::desc("Max number of allowed counter promotions")); 
- 
-cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting( 
-    cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3), 
-    cl::desc("The max number of exiting blocks of a loop to allow " 
-             " speculative counter promotion")); 
- 
-cl::opt<bool> SpeculativeCounterPromotionToLoop( 
-    cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false), 
-    cl::desc("When the option is false, if the target block is in a loop, " 
-             "the promotion will be disallowed unless the promoted counter " 
-             " update can be further/iteratively promoted into an acyclic " 
-             " region.")); 
- 
-cl::opt<bool> IterativeCounterPromotion( 
-    cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true), 
-    cl::desc("Allow counter promotion across the whole loop nest.")); 
- 
+//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
+// It also builds the data structures and initialization code needed for
+// updating execution counts and emitting the profile at runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "instrprof"
+
+namespace {
+
+cl::opt<bool> DoHashBasedCounterSplit(
+    "hash-based-counter-split",
+    cl::desc("Rename counter variable of a comdat function based on cfg hash"),
+    cl::init(true));
+
+cl::opt<bool> RuntimeCounterRelocation(
+    "runtime-counter-relocation",
+    cl::desc("Enable relocating counters at runtime."),
+    cl::init(false));
+
+cl::opt<bool> ValueProfileStaticAlloc(
+    "vp-static-alloc",
+    cl::desc("Do static counter allocation for value profiler"),
+    cl::init(true));
+
+cl::opt<double> NumCountersPerValueSite(
+    "vp-counters-per-site",
+    cl::desc("The average number of profile counters allocated "
+             "per value profiling site."),
+    // This is set to a very small value because in real programs, only
+    // a very small percentage of value sites have non-zero targets, e.g, 1/30.
+    // For those sites with non-zero profile, the average number of targets
+    // is usually smaller than 2.
+    cl::init(1.0));
+
+cl::opt<bool> AtomicCounterUpdateAll(
+    "instrprof-atomic-counter-update-all", cl::ZeroOrMore,
+    cl::desc("Make all profile counter updates atomic (for testing only)"),
+    cl::init(false));
+
+cl::opt<bool> AtomicCounterUpdatePromoted(
+    "atomic-counter-update-promoted", cl::ZeroOrMore,
+    cl::desc("Do counter update using atomic fetch add "
+             " for promoted counters only"),
+    cl::init(false));
+
+cl::opt<bool> AtomicFirstCounter(
+    "atomic-first-counter", cl::ZeroOrMore,
+    cl::desc("Use atomic fetch add for first counter in a function (usually "
+             "the entry counter)"),
+    cl::init(false));
+
+// If the option is not specified, the default behavior about whether
+// counter promotion is done depends on how instrumentaiton lowering
+// pipeline is setup, i.e., the default value of true of this option
+// does not mean the promotion will be done by default. Explicitly
+// setting this option can override the default behavior.
+cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
+                                 cl::desc("Do counter register promotion"),
+                                 cl::init(false));
+cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
+    cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
+    cl::desc("Max number counter promotions per loop to avoid"
+             " increasing register pressure too much"));
+
+// A debug option
+cl::opt<int>
+    MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
+                       cl::desc("Max number of allowed counter promotions"));
+
+cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
+    cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
+    cl::desc("The max number of exiting blocks of a loop to allow "
+             " speculative counter promotion"));
+
+cl::opt<bool> SpeculativeCounterPromotionToLoop(
+    cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
+    cl::desc("When the option is false, if the target block is in a loop, "
+             "the promotion will be disallowed unless the promoted counter "
+             " update can be further/iteratively promoted into an acyclic "
+             " region."));
+
+cl::opt<bool> IterativeCounterPromotion(
+    cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
+    cl::desc("Allow counter promotion across the whole loop nest."));
+
 cl::opt<bool> SkipRetExitBlock(
     cl::ZeroOrMore, "skip-ret-exit-block", cl::init(true),
     cl::desc("Suppress counter promotion if exit blocks contain ret."));
 
-class InstrProfilingLegacyPass : public ModulePass { 
-  InstrProfiling InstrProf; 
- 
-public: 
-  static char ID; 
- 
-  InstrProfilingLegacyPass() : ModulePass(ID) {} 
-  InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false) 
-      : ModulePass(ID), InstrProf(Options, IsCS) { 
-    initializeInstrProfilingLegacyPassPass(*PassRegistry::getPassRegistry()); 
-  } 
- 
-  StringRef getPassName() const override { 
-    return "Frontend instrumentation-based coverage lowering"; 
-  } 
- 
-  bool runOnModule(Module &M) override { 
-    auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { 
-      return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 
-    }; 
-    return InstrProf.run(M, GetTLI); 
-  } 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.setPreservesCFG(); 
-    AU.addRequired<TargetLibraryInfoWrapperPass>(); 
-  } 
-}; 
- 
-/// 
-/// A helper class to promote one counter RMW operation in the loop 
-/// into register update. 
-/// 
-/// RWM update for the counter will be sinked out of the loop after 
-/// the transformation. 
-/// 
-class PGOCounterPromoterHelper : public LoadAndStorePromoter { 
-public: 
-  PGOCounterPromoterHelper( 
-      Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init, 
-      BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks, 
-      ArrayRef<Instruction *> InsertPts, 
-      DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands, 
-      LoopInfo &LI) 
-      : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks), 
-        InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) { 
-    assert(isa<LoadInst>(L)); 
-    assert(isa<StoreInst>(S)); 
-    SSA.AddAvailableValue(PH, Init); 
-  } 
- 
-  void doExtraRewritesBeforeFinalDeletion() override { 
-    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 
-      BasicBlock *ExitBlock = ExitBlocks[i]; 
-      Instruction *InsertPos = InsertPts[i]; 
-      // Get LiveIn value into the ExitBlock. If there are multiple 
-      // predecessors, the value is defined by a PHI node in this 
-      // block. 
-      Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 
-      Value *Addr = cast<StoreInst>(Store)->getPointerOperand(); 
-      Type *Ty = LiveInValue->getType(); 
-      IRBuilder<> Builder(InsertPos); 
-      if (AtomicCounterUpdatePromoted) 
-        // automic update currently can only be promoted across the current 
-        // loop, not the whole loop nest. 
-        Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue, 
-                                AtomicOrdering::SequentiallyConsistent); 
-      else { 
-        LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted"); 
-        auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue); 
-        auto *NewStore = Builder.CreateStore(NewVal, Addr); 
- 
-        // Now update the parent loop's candidate list: 
-        if (IterativeCounterPromotion) { 
-          auto *TargetLoop = LI.getLoopFor(ExitBlock); 
-          if (TargetLoop) 
-            LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore); 
-        } 
-      } 
-    } 
-  } 
- 
-private: 
-  Instruction *Store; 
-  ArrayRef<BasicBlock *> ExitBlocks; 
-  ArrayRef<Instruction *> InsertPts; 
-  DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates; 
-  LoopInfo &LI; 
-}; 
- 
-/// A helper class to do register promotion for all profile counter 
-/// updates in a loop. 
-/// 
-class PGOCounterPromoter { 
-public: 
-  PGOCounterPromoter( 
-      DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands, 
-      Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI) 
-      : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), 
-        LI(LI), BFI(BFI) { 
- 
-    // Skip collection of ExitBlocks and InsertPts for loops that will not be 
-    // able to have counters promoted. 
-    SmallVector<BasicBlock *, 8> LoopExitBlocks; 
-    SmallPtrSet<BasicBlock *, 8> BlockSet; 
- 
-    L.getExitBlocks(LoopExitBlocks); 
-    if (!isPromotionPossible(&L, LoopExitBlocks)) 
-      return; 
- 
-    for (BasicBlock *ExitBlock : LoopExitBlocks) { 
-      if (BlockSet.insert(ExitBlock).second) { 
-        ExitBlocks.push_back(ExitBlock); 
-        InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); 
-      } 
-    } 
-  } 
- 
-  bool run(int64_t *NumPromoted) { 
-    // Skip 'infinite' loops: 
-    if (ExitBlocks.size() == 0) 
-      return false; 
+class InstrProfilingLegacyPass : public ModulePass {
+  InstrProfiling InstrProf;
+
+public:
+  static char ID;
+
+  InstrProfilingLegacyPass() : ModulePass(ID) {}
+  InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false)
+      : ModulePass(ID), InstrProf(Options, IsCS) {
+    initializeInstrProfilingLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+    return "Frontend instrumentation-based coverage lowering";
+  }
+
+  bool runOnModule(Module &M) override {
+    auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+      return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+    };
+    return InstrProf.run(M, GetTLI);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+};
+
+///
+/// A helper class to promote one counter RMW operation in the loop
+/// into register update.
+///
+/// RWM update for the counter will be sinked out of the loop after
+/// the transformation.
+///
+class PGOCounterPromoterHelper : public LoadAndStorePromoter {
+public:
+  PGOCounterPromoterHelper(
+      Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
+      BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
+      ArrayRef<Instruction *> InsertPts,
+      DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
+      LoopInfo &LI)
+      : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
+        InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
+    assert(isa<LoadInst>(L));
+    assert(isa<StoreInst>(S));
+    SSA.AddAvailableValue(PH, Init);
+  }
+
+  void doExtraRewritesBeforeFinalDeletion() override {
+    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+      BasicBlock *ExitBlock = ExitBlocks[i];
+      Instruction *InsertPos = InsertPts[i];
+      // Get LiveIn value into the ExitBlock. If there are multiple
+      // predecessors, the value is defined by a PHI node in this
+      // block.
+      Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+      Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
+      Type *Ty = LiveInValue->getType();
+      IRBuilder<> Builder(InsertPos);
+      if (AtomicCounterUpdatePromoted)
+        // automic update currently can only be promoted across the current
+        // loop, not the whole loop nest.
+        Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
+                                AtomicOrdering::SequentiallyConsistent);
+      else {
+        LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
+        auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
+        auto *NewStore = Builder.CreateStore(NewVal, Addr);
+
+        // Now update the parent loop's candidate list:
+        if (IterativeCounterPromotion) {
+          auto *TargetLoop = LI.getLoopFor(ExitBlock);
+          if (TargetLoop)
+            LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
+        }
+      }
+    }
+  }
+
+private:
+  Instruction *Store;
+  ArrayRef<BasicBlock *> ExitBlocks;
+  ArrayRef<Instruction *> InsertPts;
+  DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
+  LoopInfo &LI;
+};
+
+/// A helper class to do register promotion for all profile counter
+/// updates in a loop.
+///
+class PGOCounterPromoter {
+public:
+  PGOCounterPromoter(
+      DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
+      Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
+      : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
+        LI(LI), BFI(BFI) {
+
+    // Skip collection of ExitBlocks and InsertPts for loops that will not be
+    // able to have counters promoted.
+    SmallVector<BasicBlock *, 8> LoopExitBlocks;
+    SmallPtrSet<BasicBlock *, 8> BlockSet;
+
+    L.getExitBlocks(LoopExitBlocks);
+    if (!isPromotionPossible(&L, LoopExitBlocks))
+      return;
+
+    for (BasicBlock *ExitBlock : LoopExitBlocks) {
+      if (BlockSet.insert(ExitBlock).second) {
+        ExitBlocks.push_back(ExitBlock);
+        InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+      }
+    }
+  }
+
+  bool run(int64_t *NumPromoted) {
+    // Skip 'infinite' loops:
+    if (ExitBlocks.size() == 0)
+      return false;
 
     // Skip if any of the ExitBlocks contains a ret instruction.
     // This is to prevent dumping of incomplete profile -- if the
@@ -273,129 +273,129 @@ public:
           return false;
     }
 
-    unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L); 
-    if (MaxProm == 0) 
-      return false; 
- 
-    unsigned Promoted = 0; 
-    for (auto &Cand : LoopToCandidates[&L]) { 
- 
-      SmallVector<PHINode *, 4> NewPHIs; 
-      SSAUpdater SSA(&NewPHIs); 
-      Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); 
- 
-      // If BFI is set, we will use it to guide the promotions. 
-      if (BFI) { 
-        auto *BB = Cand.first->getParent(); 
-        auto InstrCount = BFI->getBlockProfileCount(BB); 
-        if (!InstrCount) 
-          continue; 
-        auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader()); 
-        // If the average loop trip count is not greater than 1.5, we skip 
-        // promotion. 
-        if (PreheaderCount && 
-            (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2)) 
-          continue; 
-      } 
- 
-      PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, 
-                                        L.getLoopPreheader(), ExitBlocks, 
-                                        InsertPts, LoopToCandidates, LI); 
-      Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second})); 
-      Promoted++; 
-      if (Promoted >= MaxProm) 
-        break; 
- 
-      (*NumPromoted)++; 
-      if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions) 
-        break; 
-    } 
- 
-    LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth=" 
-                      << L.getLoopDepth() << ")\n"); 
-    return Promoted != 0; 
-  } 
- 
-private: 
-  bool allowSpeculativeCounterPromotion(Loop *LP) { 
-    SmallVector<BasicBlock *, 8> ExitingBlocks; 
-    L.getExitingBlocks(ExitingBlocks); 
-    // Not considierered speculative. 
-    if (ExitingBlocks.size() == 1) 
-      return true; 
-    if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) 
-      return false; 
-    return true; 
-  } 
- 
-  // Check whether the loop satisfies the basic conditions needed to perform 
-  // Counter Promotions. 
-  bool isPromotionPossible(Loop *LP, 
-                           const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) { 
-    // We can't insert into a catchswitch. 
-    if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) { 
-          return isa<CatchSwitchInst>(Exit->getTerminator()); 
-        })) 
-      return false; 
- 
-    if (!LP->hasDedicatedExits()) 
-      return false; 
- 
-    BasicBlock *PH = LP->getLoopPreheader(); 
-    if (!PH) 
-      return false; 
- 
-    return true; 
-  } 
- 
-  // Returns the max number of Counter Promotions for LP. 
-  unsigned getMaxNumOfPromotionsInLoop(Loop *LP) { 
-    SmallVector<BasicBlock *, 8> LoopExitBlocks; 
-    LP->getExitBlocks(LoopExitBlocks); 
-    if (!isPromotionPossible(LP, LoopExitBlocks)) 
-      return 0; 
- 
-    SmallVector<BasicBlock *, 8> ExitingBlocks; 
-    LP->getExitingBlocks(ExitingBlocks); 
- 
-    // If BFI is set, we do more aggressive promotions based on BFI. 
-    if (BFI) 
-      return (unsigned)-1; 
- 
-    // Not considierered speculative. 
-    if (ExitingBlocks.size() == 1) 
-      return MaxNumOfPromotionsPerLoop; 
- 
-    if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) 
-      return 0; 
- 
-    // Whether the target block is in a loop does not matter: 
-    if (SpeculativeCounterPromotionToLoop) 
-      return MaxNumOfPromotionsPerLoop; 
- 
-    // Now check the target block: 
-    unsigned MaxProm = MaxNumOfPromotionsPerLoop; 
-    for (auto *TargetBlock : LoopExitBlocks) { 
-      auto *TargetLoop = LI.getLoopFor(TargetBlock); 
-      if (!TargetLoop) 
-        continue; 
-      unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop); 
-      unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size(); 
-      MaxProm = 
-          std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) - 
-                                PendingCandsInTarget); 
-    } 
-    return MaxProm; 
-  } 
- 
-  DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates; 
-  SmallVector<BasicBlock *, 8> ExitBlocks; 
-  SmallVector<Instruction *, 8> InsertPts; 
-  Loop &L; 
-  LoopInfo &LI; 
-  BlockFrequencyInfo *BFI; 
-}; 
- 
+    unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
+    if (MaxProm == 0)
+      return false;
+
+    unsigned Promoted = 0;
+    for (auto &Cand : LoopToCandidates[&L]) {
+
+      SmallVector<PHINode *, 4> NewPHIs;
+      SSAUpdater SSA(&NewPHIs);
+      Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
+
+      // If BFI is set, we will use it to guide the promotions.
+      if (BFI) {
+        auto *BB = Cand.first->getParent();
+        auto InstrCount = BFI->getBlockProfileCount(BB);
+        if (!InstrCount)
+          continue;
+        auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
+        // If the average loop trip count is not greater than 1.5, we skip
+        // promotion.
+        if (PreheaderCount &&
+            (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2))
+          continue;
+      }
+
+      PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
+                                        L.getLoopPreheader(), ExitBlocks,
+                                        InsertPts, LoopToCandidates, LI);
+      Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
+      Promoted++;
+      if (Promoted >= MaxProm)
+        break;
+
+      (*NumPromoted)++;
+      if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
+        break;
+    }
+
+    LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
+                      << L.getLoopDepth() << ")\n");
+    return Promoted != 0;
+  }
+
+private:
+  bool allowSpeculativeCounterPromotion(Loop *LP) {
+    SmallVector<BasicBlock *, 8> ExitingBlocks;
+    L.getExitingBlocks(ExitingBlocks);
+    // Not considierered speculative.
+    if (ExitingBlocks.size() == 1)
+      return true;
+    if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
+      return false;
+    return true;
+  }
+
+  // Check whether the loop satisfies the basic conditions needed to perform
+  // Counter Promotions.
+  bool isPromotionPossible(Loop *LP,
+                           const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
+    // We can't insert into a catchswitch.
+    if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
+          return isa<CatchSwitchInst>(Exit->getTerminator());
+        }))
+      return false;
+
+    if (!LP->hasDedicatedExits())
+      return false;
+
+    BasicBlock *PH = LP->getLoopPreheader();
+    if (!PH)
+      return false;
+
+    return true;
+  }
+
+  // Returns the max number of Counter Promotions for LP.
+  unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
+    SmallVector<BasicBlock *, 8> LoopExitBlocks;
+    LP->getExitBlocks(LoopExitBlocks);
+    if (!isPromotionPossible(LP, LoopExitBlocks))
+      return 0;
+
+    SmallVector<BasicBlock *, 8> ExitingBlocks;
+    LP->getExitingBlocks(ExitingBlocks);
+
+    // If BFI is set, we do more aggressive promotions based on BFI.
+    if (BFI)
+      return (unsigned)-1;
+
+    // Not considierered speculative.
+    if (ExitingBlocks.size() == 1)
+      return MaxNumOfPromotionsPerLoop;
+
+    if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
+      return 0;
+
+    // Whether the target block is in a loop does not matter:
+    if (SpeculativeCounterPromotionToLoop)
+      return MaxNumOfPromotionsPerLoop;
+
+    // Now check the target block:
+    unsigned MaxProm = MaxNumOfPromotionsPerLoop;
+    for (auto *TargetBlock : LoopExitBlocks) {
+      auto *TargetLoop = LI.getLoopFor(TargetBlock);
+      if (!TargetLoop)
+        continue;
+      unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
+      unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
+      MaxProm =
+          std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
+                                PendingCandsInTarget);
+    }
+    return MaxProm;
+  }
+
+  DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
+  SmallVector<BasicBlock *, 8> ExitBlocks;
+  SmallVector<Instruction *, 8> InsertPts;
+  Loop &L;
+  LoopInfo &LI;
+  BlockFrequencyInfo *BFI;
+};
+
 enum class ValueProfilingCallType {
   // Individual values are tracked. Currently used for indiret call target
   // profiling.
@@ -405,204 +405,204 @@ enum class ValueProfilingCallType {
   MemOp
 };
 
-} // end anonymous namespace 
- 
-PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { 
-  FunctionAnalysisManager &FAM = 
-      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 
-  auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 
-    return FAM.getResult<TargetLibraryAnalysis>(F); 
-  }; 
-  if (!run(M, GetTLI)) 
-    return PreservedAnalyses::all(); 
- 
-  return PreservedAnalyses::none(); 
-} 
- 
-char InstrProfilingLegacyPass::ID = 0; 
-INITIALIZE_PASS_BEGIN( 
-    InstrProfilingLegacyPass, "instrprof", 
-    "Frontend instrumentation-based coverage lowering.", false, false) 
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 
-INITIALIZE_PASS_END( 
-    InstrProfilingLegacyPass, "instrprof", 
-    "Frontend instrumentation-based coverage lowering.", false, false) 
- 
-ModulePass * 
-llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options, 
-                                     bool IsCS) { 
-  return new InstrProfilingLegacyPass(Options, IsCS); 
-} 
- 
-static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) { 
-  InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr); 
-  if (Inc) 
-    return Inc; 
-  return dyn_cast<InstrProfIncrementInst>(Instr); 
-} 
- 
-bool InstrProfiling::lowerIntrinsics(Function *F) { 
-  bool MadeChange = false; 
-  PromotionCandidates.clear(); 
-  for (BasicBlock &BB : *F) { 
-    for (auto I = BB.begin(), E = BB.end(); I != E;) { 
-      auto Instr = I++; 
-      InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr); 
-      if (Inc) { 
-        lowerIncrement(Inc); 
-        MadeChange = true; 
-      } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) { 
-        lowerValueProfileInst(Ind); 
-        MadeChange = true; 
-      } 
-    } 
-  } 
- 
-  if (!MadeChange) 
-    return false; 
- 
-  promoteCounterLoadStores(F); 
-  return true; 
-} 
- 
-bool InstrProfiling::isRuntimeCounterRelocationEnabled() const { 
-  if (RuntimeCounterRelocation.getNumOccurrences() > 0) 
-    return RuntimeCounterRelocation; 
- 
-  return TT.isOSFuchsia(); 
-} 
- 
-bool InstrProfiling::isCounterPromotionEnabled() const { 
-  if (DoCounterPromotion.getNumOccurrences() > 0) 
-    return DoCounterPromotion; 
- 
-  return Options.DoCounterPromotion; 
-} 
- 
-void InstrProfiling::promoteCounterLoadStores(Function *F) { 
-  if (!isCounterPromotionEnabled()) 
-    return; 
- 
-  DominatorTree DT(*F); 
-  LoopInfo LI(DT); 
-  DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates; 
- 
-  std::unique_ptr<BlockFrequencyInfo> BFI; 
-  if (Options.UseBFIInPromotion) { 
-    std::unique_ptr<BranchProbabilityInfo> BPI; 
-    BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F))); 
-    BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI)); 
-  } 
- 
-  for (const auto &LoadStore : PromotionCandidates) { 
-    auto *CounterLoad = LoadStore.first; 
-    auto *CounterStore = LoadStore.second; 
-    BasicBlock *BB = CounterLoad->getParent(); 
-    Loop *ParentLoop = LI.getLoopFor(BB); 
-    if (!ParentLoop) 
-      continue; 
-    LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore); 
-  } 
- 
-  SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder(); 
- 
-  // Do a post-order traversal of the loops so that counter updates can be 
-  // iteratively hoisted outside the loop nest. 
-  for (auto *Loop : llvm::reverse(Loops)) { 
-    PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get()); 
-    Promoter.run(&TotalCountersPromoted); 
-  } 
-} 
- 
-/// Check if the module contains uses of any profiling intrinsics. 
-static bool containsProfilingIntrinsics(Module &M) { 
-  if (auto *F = M.getFunction( 
-          Intrinsic::getName(llvm::Intrinsic::instrprof_increment))) 
-    if (!F->use_empty()) 
-      return true; 
-  if (auto *F = M.getFunction( 
-          Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step))) 
-    if (!F->use_empty()) 
-      return true; 
-  if (auto *F = M.getFunction( 
-          Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile))) 
-    if (!F->use_empty()) 
-      return true; 
-  return false; 
-} 
- 
-bool InstrProfiling::run( 
-    Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) { 
-  this->M = &M; 
-  this->GetTLI = std::move(GetTLI); 
-  NamesVar = nullptr; 
-  NamesSize = 0; 
-  ProfileDataMap.clear(); 
-  UsedVars.clear(); 
-  TT = Triple(M.getTargetTriple()); 
- 
-  // Emit the runtime hook even if no counters are present. 
-  bool MadeChange = emitRuntimeHook(); 
- 
-  // Improve compile time by avoiding linear scans when there is no work. 
-  GlobalVariable *CoverageNamesVar = 
-      M.getNamedGlobal(getCoverageUnusedNamesVarName()); 
-  if (!containsProfilingIntrinsics(M) && !CoverageNamesVar) 
-    return MadeChange; 
- 
-  // We did not know how many value sites there would be inside 
-  // the instrumented function. This is counting the number of instrumented 
-  // target value sites to enter it as field in the profile data variable. 
-  for (Function &F : M) { 
-    InstrProfIncrementInst *FirstProfIncInst = nullptr; 
-    for (BasicBlock &BB : F) 
-      for (auto I = BB.begin(), E = BB.end(); I != E; I++) 
-        if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I)) 
-          computeNumValueSiteCounts(Ind); 
-        else if (FirstProfIncInst == nullptr) 
-          FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I); 
- 
-    // Value profiling intrinsic lowering requires per-function profile data 
-    // variable to be created first. 
-    if (FirstProfIncInst != nullptr) 
-      static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst)); 
-  } 
- 
-  for (Function &F : M) 
-    MadeChange |= lowerIntrinsics(&F); 
- 
-  if (CoverageNamesVar) { 
-    lowerCoverageData(CoverageNamesVar); 
-    MadeChange = true; 
-  } 
- 
-  if (!MadeChange) 
-    return false; 
- 
-  emitVNodes(); 
-  emitNameData(); 
-  emitRegistration(); 
-  emitUses(); 
-  emitInitialization(); 
-  return true; 
-} 
- 
+} // end anonymous namespace
+
+PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+  if (!run(M, GetTLI))
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
+
+char InstrProfilingLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(
+    InstrProfilingLegacyPass, "instrprof",
+    "Frontend instrumentation-based coverage lowering.", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(
+    InstrProfilingLegacyPass, "instrprof",
+    "Frontend instrumentation-based coverage lowering.", false, false)
+
+ModulePass *
+llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
+                                     bool IsCS) {
+  return new InstrProfilingLegacyPass(Options, IsCS);
+}
+
+static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
+  InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
+  if (Inc)
+    return Inc;
+  return dyn_cast<InstrProfIncrementInst>(Instr);
+}
+
+bool InstrProfiling::lowerIntrinsics(Function *F) {
+  bool MadeChange = false;
+  PromotionCandidates.clear();
+  for (BasicBlock &BB : *F) {
+    for (auto I = BB.begin(), E = BB.end(); I != E;) {
+      auto Instr = I++;
+      InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
+      if (Inc) {
+        lowerIncrement(Inc);
+        MadeChange = true;
+      } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
+        lowerValueProfileInst(Ind);
+        MadeChange = true;
+      }
+    }
+  }
+
+  if (!MadeChange)
+    return false;
+
+  promoteCounterLoadStores(F);
+  return true;
+}
+
+bool InstrProfiling::isRuntimeCounterRelocationEnabled() const {
+  if (RuntimeCounterRelocation.getNumOccurrences() > 0)
+    return RuntimeCounterRelocation;
+
+  return TT.isOSFuchsia();
+}
+
+bool InstrProfiling::isCounterPromotionEnabled() const {
+  if (DoCounterPromotion.getNumOccurrences() > 0)
+    return DoCounterPromotion;
+
+  return Options.DoCounterPromotion;
+}
+
+void InstrProfiling::promoteCounterLoadStores(Function *F) {
+  if (!isCounterPromotionEnabled())
+    return;
+
+  DominatorTree DT(*F);
+  LoopInfo LI(DT);
+  DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
+
+  std::unique_ptr<BlockFrequencyInfo> BFI;
+  if (Options.UseBFIInPromotion) {
+    std::unique_ptr<BranchProbabilityInfo> BPI;
+    BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
+    BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
+  }
+
+  for (const auto &LoadStore : PromotionCandidates) {
+    auto *CounterLoad = LoadStore.first;
+    auto *CounterStore = LoadStore.second;
+    BasicBlock *BB = CounterLoad->getParent();
+    Loop *ParentLoop = LI.getLoopFor(BB);
+    if (!ParentLoop)
+      continue;
+    LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
+  }
+
+  SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
+
+  // Do a post-order traversal of the loops so that counter updates can be
+  // iteratively hoisted outside the loop nest.
+  for (auto *Loop : llvm::reverse(Loops)) {
+    PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
+    Promoter.run(&TotalCountersPromoted);
+  }
+}
+
+/// Check if the module contains uses of any profiling intrinsics.
+static bool containsProfilingIntrinsics(Module &M) {
+  if (auto *F = M.getFunction(
+          Intrinsic::getName(llvm::Intrinsic::instrprof_increment)))
+    if (!F->use_empty())
+      return true;
+  if (auto *F = M.getFunction(
+          Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step)))
+    if (!F->use_empty())
+      return true;
+  if (auto *F = M.getFunction(
+          Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile)))
+    if (!F->use_empty())
+      return true;
+  return false;
+}
+
+bool InstrProfiling::run(
+    Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
+  this->M = &M;
+  this->GetTLI = std::move(GetTLI);
+  NamesVar = nullptr;
+  NamesSize = 0;
+  ProfileDataMap.clear();
+  UsedVars.clear();
+  TT = Triple(M.getTargetTriple());
+
+  // Emit the runtime hook even if no counters are present.
+  bool MadeChange = emitRuntimeHook();
+
+  // Improve compile time by avoiding linear scans when there is no work.
+  GlobalVariable *CoverageNamesVar =
+      M.getNamedGlobal(getCoverageUnusedNamesVarName());
+  if (!containsProfilingIntrinsics(M) && !CoverageNamesVar)
+    return MadeChange;
+
+  // We did not know how many value sites there would be inside
+  // the instrumented function. This is counting the number of instrumented
+  // target value sites to enter it as field in the profile data variable.
+  for (Function &F : M) {
+    InstrProfIncrementInst *FirstProfIncInst = nullptr;
+    for (BasicBlock &BB : F)
+      for (auto I = BB.begin(), E = BB.end(); I != E; I++)
+        if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
+          computeNumValueSiteCounts(Ind);
+        else if (FirstProfIncInst == nullptr)
+          FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
+
+    // Value profiling intrinsic lowering requires per-function profile data
+    // variable to be created first.
+    if (FirstProfIncInst != nullptr)
+      static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
+  }
+
+  for (Function &F : M)
+    MadeChange |= lowerIntrinsics(&F);
+
+  if (CoverageNamesVar) {
+    lowerCoverageData(CoverageNamesVar);
+    MadeChange = true;
+  }
+
+  if (!MadeChange)
+    return false;
+
+  emitVNodes();
+  emitNameData();
+  emitRegistration();
+  emitUses();
+  emitInitialization();
+  return true;
+}
+
 static FunctionCallee getOrInsertValueProfilingCall(
     Module &M, const TargetLibraryInfo &TLI,
     ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
-  LLVMContext &Ctx = M.getContext(); 
-  auto *ReturnTy = Type::getVoidTy(M.getContext()); 
- 
-  AttributeList AL; 
-  if (auto AK = TLI.getExtAttrForI32Param(false)) 
-    AL = AL.addParamAttribute(M.getContext(), 2, AK); 
- 
+  LLVMContext &Ctx = M.getContext();
+  auto *ReturnTy = Type::getVoidTy(M.getContext());
+
+  AttributeList AL;
+  if (auto AK = TLI.getExtAttrForI32Param(false))
+    AL = AL.addParamAttribute(M.getContext(), 2, AK);
+
   assert((CallType == ValueProfilingCallType::Default ||
           CallType == ValueProfilingCallType::MemOp) &&
          "Must be Default or MemOp");
   Type *ParamTypes[] = {
-#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType 
-#include "llvm/ProfileData/InstrProfData.inc" 
+#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
+#include "llvm/ProfileData/InstrProfData.inc"
   };
   auto *ValueProfilingCallTy =
       FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
@@ -610,501 +610,501 @@ static FunctionCallee getOrInsertValueProfilingCall(
                            ? getInstrProfValueProfFuncName()
                            : getInstrProfValueProfMemOpFuncName();
   return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
-} 
- 
-void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) { 
-  GlobalVariable *Name = Ind->getName(); 
-  uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); 
-  uint64_t Index = Ind->getIndex()->getZExtValue(); 
-  auto It = ProfileDataMap.find(Name); 
-  if (It == ProfileDataMap.end()) { 
-    PerFunctionProfileData PD; 
-    PD.NumValueSites[ValueKind] = Index + 1; 
-    ProfileDataMap[Name] = PD; 
-  } else if (It->second.NumValueSites[ValueKind] <= Index) 
-    It->second.NumValueSites[ValueKind] = Index + 1; 
-} 
- 
-void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { 
-  GlobalVariable *Name = Ind->getName(); 
-  auto It = ProfileDataMap.find(Name); 
-  assert(It != ProfileDataMap.end() && It->second.DataVar && 
-         "value profiling detected in function with no counter incerement"); 
- 
-  GlobalVariable *DataVar = It->second.DataVar; 
-  uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); 
-  uint64_t Index = Ind->getIndex()->getZExtValue(); 
-  for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind) 
-    Index += It->second.NumValueSites[Kind]; 
- 
-  IRBuilder<> Builder(Ind); 
+}
+
+void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
+  GlobalVariable *Name = Ind->getName();
+  uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
+  uint64_t Index = Ind->getIndex()->getZExtValue();
+  auto It = ProfileDataMap.find(Name);
+  if (It == ProfileDataMap.end()) {
+    PerFunctionProfileData PD;
+    PD.NumValueSites[ValueKind] = Index + 1;
+    ProfileDataMap[Name] = PD;
+  } else if (It->second.NumValueSites[ValueKind] <= Index)
+    It->second.NumValueSites[ValueKind] = Index + 1;
+}
+
+void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
+  GlobalVariable *Name = Ind->getName();
+  auto It = ProfileDataMap.find(Name);
+  assert(It != ProfileDataMap.end() && It->second.DataVar &&
+         "value profiling detected in function with no counter incerement");
+
+  GlobalVariable *DataVar = It->second.DataVar;
+  uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
+  uint64_t Index = Ind->getIndex()->getZExtValue();
+  for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
+    Index += It->second.NumValueSites[Kind];
+
+  IRBuilder<> Builder(Ind);
   bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
                       llvm::InstrProfValueKind::IPVK_MemOPSize);
-  CallInst *Call = nullptr; 
-  auto *TLI = &GetTLI(*Ind->getFunction()); 
- 
-  // To support value profiling calls within Windows exception handlers, funclet 
-  // information contained within operand bundles needs to be copied over to 
-  // the library call. This is required for the IR to be processed by the 
-  // WinEHPrepare pass. 
-  SmallVector<OperandBundleDef, 1> OpBundles; 
-  Ind->getOperandBundlesAsDefs(OpBundles); 
+  CallInst *Call = nullptr;
+  auto *TLI = &GetTLI(*Ind->getFunction());
+
+  // To support value profiling calls within Windows exception handlers, funclet
+  // information contained within operand bundles needs to be copied over to
+  // the library call. This is required for the IR to be processed by the
+  // WinEHPrepare pass.
+  SmallVector<OperandBundleDef, 1> OpBundles;
+  Ind->getOperandBundlesAsDefs(OpBundles);
   if (!IsMemOpSize) {
-    Value *Args[3] = {Ind->getTargetValue(), 
-                      Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), 
-                      Builder.getInt32(Index)}; 
-    Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args, 
-                              OpBundles); 
-  } else { 
+    Value *Args[3] = {Ind->getTargetValue(),
+                      Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
+                      Builder.getInt32(Index)};
+    Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args,
+                              OpBundles);
+  } else {
     Value *Args[3] = {Ind->getTargetValue(),
                       Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
                       Builder.getInt32(Index)};
     Call = Builder.CreateCall(
         getOrInsertValueProfilingCall(*M, *TLI, ValueProfilingCallType::MemOp),
         Args, OpBundles);
-  } 
-  if (auto AK = TLI->getExtAttrForI32Param(false)) 
-    Call->addParamAttr(2, AK); 
-  Ind->replaceAllUsesWith(Call); 
-  Ind->eraseFromParent(); 
-} 
- 
-void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { 
-  GlobalVariable *Counters = getOrCreateRegionCounters(Inc); 
- 
-  IRBuilder<> Builder(Inc); 
-  uint64_t Index = Inc->getIndex()->getZExtValue(); 
-  Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(), 
-                                                   Counters, 0, Index); 
- 
-  if (isRuntimeCounterRelocationEnabled()) { 
-    Type *Int64Ty = Type::getInt64Ty(M->getContext()); 
-    Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext()); 
-    Function *Fn = Inc->getParent()->getParent(); 
-    Instruction &I = Fn->getEntryBlock().front(); 
-    LoadInst *LI = dyn_cast<LoadInst>(&I); 
-    if (!LI) { 
-      IRBuilder<> Builder(&I); 
-      Type *Int64Ty = Type::getInt64Ty(M->getContext()); 
-      GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName()); 
-      if (!Bias) { 
-        Bias = new GlobalVariable(*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, 
-                                  Constant::getNullValue(Int64Ty), 
-                                  getInstrProfCounterBiasVarName()); 
-        Bias->setVisibility(GlobalVariable::HiddenVisibility); 
-      } 
-      LI = Builder.CreateLoad(Int64Ty, Bias); 
-    } 
-    auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI); 
-    Addr = Builder.CreateIntToPtr(Add, Int64PtrTy); 
-  } 
- 
-  if (Options.Atomic || AtomicCounterUpdateAll || 
-      (Index == 0 && AtomicFirstCounter)) { 
-    Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(), 
-                            AtomicOrdering::Monotonic); 
-  } else { 
-    Value *IncStep = Inc->getStep(); 
-    Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount"); 
-    auto *Count = Builder.CreateAdd(Load, Inc->getStep()); 
-    auto *Store = Builder.CreateStore(Count, Addr); 
-    if (isCounterPromotionEnabled()) 
-      PromotionCandidates.emplace_back(cast<Instruction>(Load), Store); 
-  } 
-  Inc->eraseFromParent(); 
-} 
- 
-void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) { 
-  ConstantArray *Names = 
-      cast<ConstantArray>(CoverageNamesVar->getInitializer()); 
-  for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) { 
-    Constant *NC = Names->getOperand(I); 
-    Value *V = NC->stripPointerCasts(); 
-    assert(isa<GlobalVariable>(V) && "Missing reference to function name"); 
-    GlobalVariable *Name = cast<GlobalVariable>(V); 
- 
-    Name->setLinkage(GlobalValue::PrivateLinkage); 
-    ReferencedNames.push_back(Name); 
-    NC->dropAllReferences(); 
-  } 
-  CoverageNamesVar->eraseFromParent(); 
-} 
- 
-/// Get the name of a profiling variable for a particular function. 
-static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) { 
-  StringRef NamePrefix = getInstrProfNameVarPrefix(); 
-  StringRef Name = Inc->getName()->getName().substr(NamePrefix.size()); 
-  Function *F = Inc->getParent()->getParent(); 
-  Module *M = F->getParent(); 
-  if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) || 
-      !canRenameComdatFunc(*F)) 
-    return (Prefix + Name).str(); 
-  uint64_t FuncHash = Inc->getHash()->getZExtValue(); 
-  SmallVector<char, 24> HashPostfix; 
-  if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix))) 
-    return (Prefix + Name).str(); 
-  return (Prefix + Name + "." + Twine(FuncHash)).str(); 
-} 
- 
-static inline bool shouldRecordFunctionAddr(Function *F) { 
-  // Check the linkage 
-  bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage(); 
-  if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && 
-      !HasAvailableExternallyLinkage) 
-    return true; 
- 
-  // A function marked 'alwaysinline' with available_externally linkage can't 
-  // have its address taken. Doing so would create an undefined external ref to 
-  // the function, which would fail to link. 
-  if (HasAvailableExternallyLinkage && 
-      F->hasFnAttribute(Attribute::AlwaysInline)) 
-    return false; 
- 
-  // Prohibit function address recording if the function is both internal and 
-  // COMDAT. This avoids the profile data variable referencing internal symbols 
-  // in COMDAT. 
-  if (F->hasLocalLinkage() && F->hasComdat()) 
-    return false; 
- 
-  // Check uses of this function for other than direct calls or invokes to it. 
-  // Inline virtual functions have linkeOnceODR linkage. When a key method 
-  // exists, the vtable will only be emitted in the TU where the key method 
-  // is defined. In a TU where vtable is not available, the function won't 
-  // be 'addresstaken'. If its address is not recorded here, the profile data 
-  // with missing address may be picked by the linker leading  to missing 
-  // indirect call target info. 
-  return F->hasAddressTaken() || F->hasLinkOnceLinkage(); 
-} 
- 
-static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { 
-  // Don't do this for Darwin.  compiler-rt uses linker magic. 
-  if (TT.isOSDarwin()) 
-    return false; 
-  // Use linker script magic to get data/cnts/name start/end. 
-  if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() || 
-      TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() || 
-      TT.isOSWindows()) 
-    return false; 
- 
-  return true; 
-} 
- 
-GlobalVariable * 
-InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { 
-  GlobalVariable *NamePtr = Inc->getName(); 
-  auto It = ProfileDataMap.find(NamePtr); 
-  PerFunctionProfileData PD; 
-  if (It != ProfileDataMap.end()) { 
-    if (It->second.RegionCounters) 
-      return It->second.RegionCounters; 
-    PD = It->second; 
-  } 
- 
-  // Match the linkage and visibility of the name global. COFF supports using 
-  // comdats with internal symbols, so do that if we can. 
-  Function *Fn = Inc->getParent()->getParent(); 
-  GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage(); 
-  GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility(); 
-  if (TT.isOSBinFormatCOFF()) { 
-    Linkage = GlobalValue::InternalLinkage; 
-    Visibility = GlobalValue::DefaultVisibility; 
-  } 
- 
-  // Move the name variable to the right section. Place them in a COMDAT group 
-  // if the associated function is a COMDAT. This will make sure that only one 
-  // copy of counters of the COMDAT function will be emitted after linking. Keep 
-  // in mind that this pass may run before the inliner, so we need to create a 
-  // new comdat group for the counters and profiling data. If we use the comdat 
-  // of the parent function, that will result in relocations against discarded 
-  // sections. 
-  bool NeedComdat = needsComdatForCounter(*Fn, *M); 
-  if (NeedComdat) { 
-    if (TT.isOSBinFormatCOFF()) { 
-      // For COFF, put the counters, data, and values each into their own 
-      // comdats. We can't use a group because the Visual C++ linker will 
-      // report duplicate symbol errors if there are multiple external symbols 
-      // with the same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE. 
-      Linkage = GlobalValue::LinkOnceODRLinkage; 
-      Visibility = GlobalValue::HiddenVisibility; 
-    } 
-  } 
+  }
+  if (auto AK = TLI->getExtAttrForI32Param(false))
+    Call->addParamAttr(2, AK);
+  Ind->replaceAllUsesWith(Call);
+  Ind->eraseFromParent();
+}
+
+void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
+  GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
+
+  IRBuilder<> Builder(Inc);
+  uint64_t Index = Inc->getIndex()->getZExtValue();
+  Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(),
+                                                   Counters, 0, Index);
+
+  if (isRuntimeCounterRelocationEnabled()) {
+    Type *Int64Ty = Type::getInt64Ty(M->getContext());
+    Type *Int64PtrTy = Type::getInt64PtrTy(M->getContext());
+    Function *Fn = Inc->getParent()->getParent();
+    Instruction &I = Fn->getEntryBlock().front();
+    LoadInst *LI = dyn_cast<LoadInst>(&I);
+    if (!LI) {
+      IRBuilder<> Builder(&I);
+      Type *Int64Ty = Type::getInt64Ty(M->getContext());
+      GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
+      if (!Bias) {
+        Bias = new GlobalVariable(*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
+                                  Constant::getNullValue(Int64Ty),
+                                  getInstrProfCounterBiasVarName());
+        Bias->setVisibility(GlobalVariable::HiddenVisibility);
+      }
+      LI = Builder.CreateLoad(Int64Ty, Bias);
+    }
+    auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), LI);
+    Addr = Builder.CreateIntToPtr(Add, Int64PtrTy);
+  }
+
+  if (Options.Atomic || AtomicCounterUpdateAll ||
+      (Index == 0 && AtomicFirstCounter)) {
+    Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
+                            AtomicOrdering::Monotonic);
+  } else {
+    Value *IncStep = Inc->getStep();
+    Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
+    auto *Count = Builder.CreateAdd(Load, Inc->getStep());
+    auto *Store = Builder.CreateStore(Count, Addr);
+    if (isCounterPromotionEnabled())
+      PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
+  }
+  Inc->eraseFromParent();
+}
+
+void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
+  ConstantArray *Names =
+      cast<ConstantArray>(CoverageNamesVar->getInitializer());
+  for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
+    Constant *NC = Names->getOperand(I);
+    Value *V = NC->stripPointerCasts();
+    assert(isa<GlobalVariable>(V) && "Missing reference to function name");
+    GlobalVariable *Name = cast<GlobalVariable>(V);
+
+    Name->setLinkage(GlobalValue::PrivateLinkage);
+    ReferencedNames.push_back(Name);
+    NC->dropAllReferences();
+  }
+  CoverageNamesVar->eraseFromParent();
+}
+
+/// Get the name of a profiling variable for a particular function.
+static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
+  StringRef NamePrefix = getInstrProfNameVarPrefix();
+  StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
+  Function *F = Inc->getParent()->getParent();
+  Module *M = F->getParent();
+  if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
+      !canRenameComdatFunc(*F))
+    return (Prefix + Name).str();
+  uint64_t FuncHash = Inc->getHash()->getZExtValue();
+  SmallVector<char, 24> HashPostfix;
+  if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
+    return (Prefix + Name).str();
+  return (Prefix + Name + "." + Twine(FuncHash)).str();
+}
+
+static inline bool shouldRecordFunctionAddr(Function *F) {
+  // Check the linkage
+  bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
+  if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
+      !HasAvailableExternallyLinkage)
+    return true;
+
+  // A function marked 'alwaysinline' with available_externally linkage can't
+  // have its address taken. Doing so would create an undefined external ref to
+  // the function, which would fail to link.
+  if (HasAvailableExternallyLinkage &&
+      F->hasFnAttribute(Attribute::AlwaysInline))
+    return false;
+
+  // Prohibit function address recording if the function is both internal and
+  // COMDAT. This avoids the profile data variable referencing internal symbols
+  // in COMDAT.
+  if (F->hasLocalLinkage() && F->hasComdat())
+    return false;
+
+  // Check uses of this function for other than direct calls or invokes to it.
+  // Inline virtual functions have linkeOnceODR linkage. When a key method
+  // exists, the vtable will only be emitted in the TU where the key method
+  // is defined. In a TU where vtable is not available, the function won't
+  // be 'addresstaken'. If its address is not recorded here, the profile data
+  // with missing address may be picked by the linker leading  to missing
+  // indirect call target info.
+  return F->hasAddressTaken() || F->hasLinkOnceLinkage();
+}
+
+static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
+  // Don't do this for Darwin.  compiler-rt uses linker magic.
+  if (TT.isOSDarwin())
+    return false;
+  // Use linker script magic to get data/cnts/name start/end.
+  if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
+      TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() ||
+      TT.isOSWindows())
+    return false;
+
+  return true;
+}
+
+GlobalVariable *
+InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
+  GlobalVariable *NamePtr = Inc->getName();
+  auto It = ProfileDataMap.find(NamePtr);
+  PerFunctionProfileData PD;
+  if (It != ProfileDataMap.end()) {
+    if (It->second.RegionCounters)
+      return It->second.RegionCounters;
+    PD = It->second;
+  }
+
+  // Match the linkage and visibility of the name global. COFF supports using
+  // comdats with internal symbols, so do that if we can.
+  Function *Fn = Inc->getParent()->getParent();
+  GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
+  GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
+  if (TT.isOSBinFormatCOFF()) {
+    Linkage = GlobalValue::InternalLinkage;
+    Visibility = GlobalValue::DefaultVisibility;
+  }
+
+  // Move the name variable to the right section. Place them in a COMDAT group
+  // if the associated function is a COMDAT. This will make sure that only one
+  // copy of counters of the COMDAT function will be emitted after linking. Keep
+  // in mind that this pass may run before the inliner, so we need to create a
+  // new comdat group for the counters and profiling data. If we use the comdat
+  // of the parent function, that will result in relocations against discarded
+  // sections.
+  bool NeedComdat = needsComdatForCounter(*Fn, *M);
+  if (NeedComdat) {
+    if (TT.isOSBinFormatCOFF()) {
+      // For COFF, put the counters, data, and values each into their own
+      // comdats. We can't use a group because the Visual C++ linker will
+      // report duplicate symbol errors if there are multiple external symbols
+      // with the same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE.
+      Linkage = GlobalValue::LinkOnceODRLinkage;
+      Visibility = GlobalValue::HiddenVisibility;
+    }
+  }
   std::string DataVarName = getVarName(Inc, getInstrProfDataVarPrefix());
-  auto MaybeSetComdat = [=](GlobalVariable *GV) { 
-    if (NeedComdat) 
+  auto MaybeSetComdat = [=](GlobalVariable *GV) {
+    if (NeedComdat)
       GV->setComdat(M->getOrInsertComdat(TT.isOSBinFormatCOFF() ? GV->getName()
                                                                 : DataVarName));
-  }; 
- 
-  uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); 
-  LLVMContext &Ctx = M->getContext(); 
-  ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); 
- 
-  // Create the counters variable. 
-  auto *CounterPtr = 
-      new GlobalVariable(*M, CounterTy, false, Linkage, 
-                         Constant::getNullValue(CounterTy), 
-                         getVarName(Inc, getInstrProfCountersVarPrefix())); 
-  CounterPtr->setVisibility(Visibility); 
-  CounterPtr->setSection( 
-      getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat())); 
-  CounterPtr->setAlignment(Align(8)); 
-  MaybeSetComdat(CounterPtr); 
-  CounterPtr->setLinkage(Linkage); 
- 
-  auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); 
-  // Allocate statically the array of pointers to value profile nodes for 
-  // the current function. 
-  Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy); 
-  if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(TT)) { 
-    uint64_t NS = 0; 
-    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 
-      NS += PD.NumValueSites[Kind]; 
-    if (NS) { 
-      ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS); 
- 
-      auto *ValuesVar = 
-          new GlobalVariable(*M, ValuesTy, false, Linkage, 
-                             Constant::getNullValue(ValuesTy), 
-                             getVarName(Inc, getInstrProfValuesVarPrefix())); 
-      ValuesVar->setVisibility(Visibility); 
-      ValuesVar->setSection( 
-          getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); 
-      ValuesVar->setAlignment(Align(8)); 
-      MaybeSetComdat(ValuesVar); 
-      ValuesPtrExpr = 
-          ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx)); 
-    } 
-  } 
- 
-  // Create data variable. 
-  auto *Int16Ty = Type::getInt16Ty(Ctx); 
-  auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1); 
-  Type *DataTypes[] = { 
-#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType, 
-#include "llvm/ProfileData/InstrProfData.inc" 
-  }; 
-  auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes)); 
- 
-  Constant *FunctionAddr = shouldRecordFunctionAddr(Fn) 
-                               ? ConstantExpr::getBitCast(Fn, Int8PtrTy) 
-                               : ConstantPointerNull::get(Int8PtrTy); 
- 
-  Constant *Int16ArrayVals[IPVK_Last + 1]; 
-  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 
-    Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]); 
- 
-  Constant *DataVals[] = { 
-#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init, 
-#include "llvm/ProfileData/InstrProfData.inc" 
-  }; 
+  };
+
+  uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
+  LLVMContext &Ctx = M->getContext();
+  ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
+
+  // Create the counters variable.
+  auto *CounterPtr =
+      new GlobalVariable(*M, CounterTy, false, Linkage,
+                         Constant::getNullValue(CounterTy),
+                         getVarName(Inc, getInstrProfCountersVarPrefix()));
+  CounterPtr->setVisibility(Visibility);
+  CounterPtr->setSection(
+      getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
+  CounterPtr->setAlignment(Align(8));
+  MaybeSetComdat(CounterPtr);
+  CounterPtr->setLinkage(Linkage);
+
+  auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
+  // Allocate statically the array of pointers to value profile nodes for
+  // the current function.
+  Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
+  if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(TT)) {
+    uint64_t NS = 0;
+    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+      NS += PD.NumValueSites[Kind];
+    if (NS) {
+      ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
+
+      auto *ValuesVar =
+          new GlobalVariable(*M, ValuesTy, false, Linkage,
+                             Constant::getNullValue(ValuesTy),
+                             getVarName(Inc, getInstrProfValuesVarPrefix()));
+      ValuesVar->setVisibility(Visibility);
+      ValuesVar->setSection(
+          getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
+      ValuesVar->setAlignment(Align(8));
+      MaybeSetComdat(ValuesVar);
+      ValuesPtrExpr =
+          ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
+    }
+  }
+
+  // Create data variable.
+  auto *Int16Ty = Type::getInt16Ty(Ctx);
+  auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
+  Type *DataTypes[] = {
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
+#include "llvm/ProfileData/InstrProfData.inc"
+  };
+  auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
+
+  Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
+                               ? ConstantExpr::getBitCast(Fn, Int8PtrTy)
+                               : ConstantPointerNull::get(Int8PtrTy);
+
+  Constant *Int16ArrayVals[IPVK_Last + 1];
+  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+    Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
+
+  Constant *DataVals[] = {
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
+#include "llvm/ProfileData/InstrProfData.inc"
+  };
   auto *Data =
       new GlobalVariable(*M, DataTy, false, Linkage,
                          ConstantStruct::get(DataTy, DataVals), DataVarName);
-  Data->setVisibility(Visibility); 
-  Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat())); 
-  Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT)); 
-  MaybeSetComdat(Data); 
-  Data->setLinkage(Linkage); 
- 
-  PD.RegionCounters = CounterPtr; 
-  PD.DataVar = Data; 
-  ProfileDataMap[NamePtr] = PD; 
- 
-  // Mark the data variable as used so that it isn't stripped out. 
-  UsedVars.push_back(Data); 
-  // Now that the linkage set by the FE has been passed to the data and counter 
-  // variables, reset Name variable's linkage and visibility to private so that 
-  // it can be removed later by the compiler. 
-  NamePtr->setLinkage(GlobalValue::PrivateLinkage); 
-  // Collect the referenced names to be used by emitNameData. 
-  ReferencedNames.push_back(NamePtr); 
- 
-  return CounterPtr; 
-} 
- 
-void InstrProfiling::emitVNodes() { 
-  if (!ValueProfileStaticAlloc) 
-    return; 
- 
-  // For now only support this on platforms that do 
-  // not require runtime registration to discover 
-  // named section start/end. 
-  if (needsRuntimeRegistrationOfSectionRange(TT)) 
-    return; 
- 
-  size_t TotalNS = 0; 
-  for (auto &PD : ProfileDataMap) { 
-    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 
-      TotalNS += PD.second.NumValueSites[Kind]; 
-  } 
- 
-  if (!TotalNS) 
-    return; 
- 
-  uint64_t NumCounters = TotalNS * NumCountersPerValueSite; 
-// Heuristic for small programs with very few total value sites. 
-// The default value of vp-counters-per-site is chosen based on 
-// the observation that large apps usually have a low percentage 
-// of value sites that actually have any profile data, and thus 
-// the average number of counters per site is low. For small 
-// apps with very few sites, this may not be true. Bump up the 
-// number of counters in this case. 
-#define INSTR_PROF_MIN_VAL_COUNTS 10 
-  if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS) 
-    NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2); 
- 
-  auto &Ctx = M->getContext(); 
-  Type *VNodeTypes[] = { 
-#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType, 
-#include "llvm/ProfileData/InstrProfData.inc" 
-  }; 
-  auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes)); 
- 
-  ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters); 
-  auto *VNodesVar = new GlobalVariable( 
-      *M, VNodesTy, false, GlobalValue::PrivateLinkage, 
-      Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName()); 
-  VNodesVar->setSection( 
-      getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat())); 
-  UsedVars.push_back(VNodesVar); 
-} 
- 
-void InstrProfiling::emitNameData() { 
-  std::string UncompressedData; 
- 
-  if (ReferencedNames.empty()) 
-    return; 
- 
-  std::string CompressedNameStr; 
-  if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr, 
-                                          DoInstrProfNameCompression)) { 
-    report_fatal_error(toString(std::move(E)), false); 
-  } 
- 
-  auto &Ctx = M->getContext(); 
-  auto *NamesVal = ConstantDataArray::getString( 
-      Ctx, StringRef(CompressedNameStr), false); 
-  NamesVar = new GlobalVariable(*M, NamesVal->getType(), true, 
-                                GlobalValue::PrivateLinkage, NamesVal, 
-                                getInstrProfNamesVarName()); 
-  NamesSize = CompressedNameStr.size(); 
-  NamesVar->setSection( 
-      getInstrProfSectionName(IPSK_name, TT.getObjectFormat())); 
-  // On COFF, it's important to reduce the alignment down to 1 to prevent the 
-  // linker from inserting padding before the start of the names section or 
-  // between names entries. 
-  NamesVar->setAlignment(Align(1)); 
-  UsedVars.push_back(NamesVar); 
- 
-  for (auto *NamePtr : ReferencedNames) 
-    NamePtr->eraseFromParent(); 
-} 
- 
-void InstrProfiling::emitRegistration() { 
-  if (!needsRuntimeRegistrationOfSectionRange(TT)) 
-    return; 
- 
-  // Construct the function. 
-  auto *VoidTy = Type::getVoidTy(M->getContext()); 
-  auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext()); 
-  auto *Int64Ty = Type::getInt64Ty(M->getContext()); 
-  auto *RegisterFTy = FunctionType::get(VoidTy, false); 
-  auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage, 
-                                     getInstrProfRegFuncsName(), M); 
-  RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 
-  if (Options.NoRedZone) 
-    RegisterF->addFnAttr(Attribute::NoRedZone); 
- 
-  auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false); 
-  auto *RuntimeRegisterF = 
-      Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage, 
-                       getInstrProfRegFuncName(), M); 
- 
-  IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF)); 
-  for (Value *Data : UsedVars) 
-    if (Data != NamesVar && !isa<Function>(Data)) 
-      IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy)); 
- 
-  if (NamesVar) { 
-    Type *ParamTypes[] = {VoidPtrTy, Int64Ty}; 
-    auto *NamesRegisterTy = 
-        FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false); 
-    auto *NamesRegisterF = 
-        Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage, 
-                         getInstrProfNamesRegFuncName(), M); 
-    IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy), 
-                                    IRB.getInt64(NamesSize)}); 
-  } 
- 
-  IRB.CreateRetVoid(); 
-} 
- 
-bool InstrProfiling::emitRuntimeHook() { 
-  // We expect the linker to be invoked with -u<hook_var> flag for Linux or 
-  // Fuchsia, in which case there is no need to emit the user function. 
-  if (TT.isOSLinux() || TT.isOSFuchsia()) 
-    return false; 
- 
-  // If the module's provided its own runtime, we don't need to do anything. 
-  if (M->getGlobalVariable(getInstrProfRuntimeHookVarName())) 
-    return false; 
- 
-  // Declare an external variable that will pull in the runtime initialization. 
-  auto *Int32Ty = Type::getInt32Ty(M->getContext()); 
-  auto *Var = 
-      new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage, 
-                         nullptr, getInstrProfRuntimeHookVarName()); 
- 
-  // Make a function that uses it. 
-  auto *User = Function::Create(FunctionType::get(Int32Ty, false), 
-                                GlobalValue::LinkOnceODRLinkage, 
-                                getInstrProfRuntimeHookVarUseFuncName(), M); 
-  User->addFnAttr(Attribute::NoInline); 
-  if (Options.NoRedZone) 
-    User->addFnAttr(Attribute::NoRedZone); 
-  User->setVisibility(GlobalValue::HiddenVisibility); 
-  if (TT.supportsCOMDAT()) 
-    User->setComdat(M->getOrInsertComdat(User->getName())); 
- 
-  IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User)); 
-  auto *Load = IRB.CreateLoad(Int32Ty, Var); 
-  IRB.CreateRet(Load); 
- 
-  // Mark the user variable as used so that it isn't stripped out. 
-  UsedVars.push_back(User); 
-  return true; 
-} 
- 
-void InstrProfiling::emitUses() { 
-  if (!UsedVars.empty()) 
-    appendToUsed(*M, UsedVars); 
-} 
- 
-void InstrProfiling::emitInitialization() { 
-  // Create ProfileFileName variable. Don't don't this for the 
-  // context-sensitive instrumentation lowering: This lowering is after 
-  // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should 
-  // have already create the variable before LTO/ThinLTO linking. 
-  if (!IsCS) 
-    createProfileFileNameVar(*M, Options.InstrProfileOutput); 
-  Function *RegisterF = M->getFunction(getInstrProfRegFuncsName()); 
-  if (!RegisterF) 
-    return; 
- 
-  // Create the initialization function. 
-  auto *VoidTy = Type::getVoidTy(M->getContext()); 
-  auto *F = Function::Create(FunctionType::get(VoidTy, false), 
-                             GlobalValue::InternalLinkage, 
-                             getInstrProfInitFuncName(), M); 
-  F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 
-  F->addFnAttr(Attribute::NoInline); 
-  if (Options.NoRedZone) 
-    F->addFnAttr(Attribute::NoRedZone); 
- 
-  // Add the basic block and the necessary calls. 
-  IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F)); 
-  IRB.CreateCall(RegisterF, {}); 
-  IRB.CreateRetVoid(); 
- 
-  appendToGlobalCtors(*M, F, 0); 
-} 
+  Data->setVisibility(Visibility);
+  Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
+  Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
+  MaybeSetComdat(Data);
+  Data->setLinkage(Linkage);
+
+  PD.RegionCounters = CounterPtr;
+  PD.DataVar = Data;
+  ProfileDataMap[NamePtr] = PD;
+
+  // Mark the data variable as used so that it isn't stripped out.
+  UsedVars.push_back(Data);
+  // Now that the linkage set by the FE has been passed to the data and counter
+  // variables, reset Name variable's linkage and visibility to private so that
+  // it can be removed later by the compiler.
+  NamePtr->setLinkage(GlobalValue::PrivateLinkage);
+  // Collect the referenced names to be used by emitNameData.
+  ReferencedNames.push_back(NamePtr);
+
+  return CounterPtr;
+}
+
+void InstrProfiling::emitVNodes() {
+  if (!ValueProfileStaticAlloc)
+    return;
+
+  // For now only support this on platforms that do
+  // not require runtime registration to discover
+  // named section start/end.
+  if (needsRuntimeRegistrationOfSectionRange(TT))
+    return;
+
+  size_t TotalNS = 0;
+  for (auto &PD : ProfileDataMap) {
+    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+      TotalNS += PD.second.NumValueSites[Kind];
+  }
+
+  if (!TotalNS)
+    return;
+
+  uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
+// Heuristic for small programs with very few total value sites.
+// The default value of vp-counters-per-site is chosen based on
+// the observation that large apps usually have a low percentage
+// of value sites that actually have any profile data, and thus
+// the average number of counters per site is low. For small
+// apps with very few sites, this may not be true. Bump up the
+// number of counters in this case.
+#define INSTR_PROF_MIN_VAL_COUNTS 10
+  if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
+    NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
+
+  auto &Ctx = M->getContext();
+  Type *VNodeTypes[] = {
+#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
+#include "llvm/ProfileData/InstrProfData.inc"
+  };
+  auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes));
+
+  ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
+  auto *VNodesVar = new GlobalVariable(
+      *M, VNodesTy, false, GlobalValue::PrivateLinkage,
+      Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
+  VNodesVar->setSection(
+      getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
+  UsedVars.push_back(VNodesVar);
+}
+
+void InstrProfiling::emitNameData() {
+  std::string UncompressedData;
+
+  if (ReferencedNames.empty())
+    return;
+
+  std::string CompressedNameStr;
+  if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
+                                          DoInstrProfNameCompression)) {
+    report_fatal_error(toString(std::move(E)), false);
+  }
+
+  auto &Ctx = M->getContext();
+  auto *NamesVal = ConstantDataArray::getString(
+      Ctx, StringRef(CompressedNameStr), false);
+  NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
+                                GlobalValue::PrivateLinkage, NamesVal,
+                                getInstrProfNamesVarName());
+  NamesSize = CompressedNameStr.size();
+  NamesVar->setSection(
+      getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
+  // On COFF, it's important to reduce the alignment down to 1 to prevent the
+  // linker from inserting padding before the start of the names section or
+  // between names entries.
+  NamesVar->setAlignment(Align(1));
+  UsedVars.push_back(NamesVar);
+
+  for (auto *NamePtr : ReferencedNames)
+    NamePtr->eraseFromParent();
+}
+
+void InstrProfiling::emitRegistration() {
+  if (!needsRuntimeRegistrationOfSectionRange(TT))
+    return;
+
+  // Construct the function.
+  auto *VoidTy = Type::getVoidTy(M->getContext());
+  auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
+  auto *Int64Ty = Type::getInt64Ty(M->getContext());
+  auto *RegisterFTy = FunctionType::get(VoidTy, false);
+  auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
+                                     getInstrProfRegFuncsName(), M);
+  RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  if (Options.NoRedZone)
+    RegisterF->addFnAttr(Attribute::NoRedZone);
+
+  auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
+  auto *RuntimeRegisterF =
+      Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
+                       getInstrProfRegFuncName(), M);
+
+  IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
+  for (Value *Data : UsedVars)
+    if (Data != NamesVar && !isa<Function>(Data))
+      IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
+
+  if (NamesVar) {
+    Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
+    auto *NamesRegisterTy =
+        FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false);
+    auto *NamesRegisterF =
+        Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
+                         getInstrProfNamesRegFuncName(), M);
+    IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
+                                    IRB.getInt64(NamesSize)});
+  }
+
+  IRB.CreateRetVoid();
+}
+
+bool InstrProfiling::emitRuntimeHook() {
+  // We expect the linker to be invoked with -u<hook_var> flag for Linux or
+  // Fuchsia, in which case there is no need to emit the user function.
+  if (TT.isOSLinux() || TT.isOSFuchsia())
+    return false;
+
+  // If the module's provided its own runtime, we don't need to do anything.
+  if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
+    return false;
+
+  // Declare an external variable that will pull in the runtime initialization.
+  auto *Int32Ty = Type::getInt32Ty(M->getContext());
+  auto *Var =
+      new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
+                         nullptr, getInstrProfRuntimeHookVarName());
+
+  // Make a function that uses it.
+  auto *User = Function::Create(FunctionType::get(Int32Ty, false),
+                                GlobalValue::LinkOnceODRLinkage,
+                                getInstrProfRuntimeHookVarUseFuncName(), M);
+  User->addFnAttr(Attribute::NoInline);
+  if (Options.NoRedZone)
+    User->addFnAttr(Attribute::NoRedZone);
+  User->setVisibility(GlobalValue::HiddenVisibility);
+  if (TT.supportsCOMDAT())
+    User->setComdat(M->getOrInsertComdat(User->getName()));
+
+  IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
+  auto *Load = IRB.CreateLoad(Int32Ty, Var);
+  IRB.CreateRet(Load);
+
+  // Mark the user variable as used so that it isn't stripped out.
+  UsedVars.push_back(User);
+  return true;
+}
+
+void InstrProfiling::emitUses() {
+  if (!UsedVars.empty())
+    appendToUsed(*M, UsedVars);
+}
+
+void InstrProfiling::emitInitialization() {
+  // Create ProfileFileName variable. Don't don't this for the
+  // context-sensitive instrumentation lowering: This lowering is after
+  // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
+  // have already create the variable before LTO/ThinLTO linking.
+  if (!IsCS)
+    createProfileFileNameVar(*M, Options.InstrProfileOutput);
+  Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
+  if (!RegisterF)
+    return;
+
+  // Create the initialization function.
+  auto *VoidTy = Type::getVoidTy(M->getContext());
+  auto *F = Function::Create(FunctionType::get(VoidTy, false),
+                             GlobalValue::InternalLinkage,
+                             getInstrProfInitFuncName(), M);
+  F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  F->addFnAttr(Attribute::NoInline);
+  if (Options.NoRedZone)
+    F->addFnAttr(Attribute::NoRedZone);
+
+  // Add the basic block and the necessary calls.
+  IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
+  IRB.CreateCall(RegisterF, {});
+  IRB.CreateRetVoid();
+
+  appendToGlobalCtors(*M, F, 0);
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/Instrumentation.cpp
index 08137cf836..cfdf3cad97 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -1,131 +1,131 @@
-//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file defines the common initialization infrastructure for the 
-// Instrumentation library. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation.h" 
-#include "llvm-c/Initialization.h" 
-#include "llvm/ADT/Triple.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/IR/Module.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/PassRegistry.h" 
- 
-using namespace llvm; 
- 
-/// Moves I before IP. Returns new insert point. 
-static BasicBlock::iterator moveBeforeInsertPoint(BasicBlock::iterator I, BasicBlock::iterator IP) { 
-  // If I is IP, move the insert point down. 
-  if (I == IP) { 
-    ++IP; 
-  } else { 
-    // Otherwise, move I before IP and return IP. 
-    I->moveBefore(&*IP); 
-  } 
-  return IP; 
-} 
- 
-/// Instrumentation passes often insert conditional checks into entry blocks. 
-/// Call this function before splitting the entry block to move instructions 
-/// that must remain in the entry block up before the split point. Static 
-/// allocas and llvm.localescape calls, for example, must remain in the entry 
-/// block. 
-BasicBlock::iterator llvm::PrepareToSplitEntryBlock(BasicBlock &BB, 
-                                                    BasicBlock::iterator IP) { 
-  assert(&BB.getParent()->getEntryBlock() == &BB); 
-  for (auto I = IP, E = BB.end(); I != E; ++I) { 
-    bool KeepInEntry = false; 
-    if (auto *AI = dyn_cast<AllocaInst>(I)) { 
-      if (AI->isStaticAlloca()) 
-        KeepInEntry = true; 
-    } else if (auto *II = dyn_cast<IntrinsicInst>(I)) { 
-      if (II->getIntrinsicID() == llvm::Intrinsic::localescape) 
-        KeepInEntry = true; 
-    } 
-    if (KeepInEntry) 
-      IP = moveBeforeInsertPoint(I, IP); 
-  } 
-  return IP; 
-} 
- 
-// Create a constant for Str so that we can pass it to the run-time lib. 
-GlobalVariable *llvm::createPrivateGlobalForString(Module &M, StringRef Str, 
-                                                   bool AllowMerging, 
-                                                   const char *NamePrefix) { 
-  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); 
-  // We use private linkage for module-local strings. If they can be merged 
-  // with another one, we set the unnamed_addr attribute. 
-  GlobalVariable *GV = 
-      new GlobalVariable(M, StrConst->getType(), true, 
-                         GlobalValue::PrivateLinkage, StrConst, NamePrefix); 
-  if (AllowMerging) 
-    GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 
-  GV->setAlignment(Align(1)); // Strings may not be merged w/o setting 
-                              // alignment explicitly. 
-  return GV; 
-} 
- 
-Comdat *llvm::GetOrCreateFunctionComdat(Function &F, Triple &T, 
-                                        const std::string &ModuleId) { 
-  if (auto Comdat = F.getComdat()) return Comdat; 
-  assert(F.hasName()); 
-  Module *M = F.getParent(); 
-  std::string Name = std::string(F.getName()); 
- 
-  // Make a unique comdat name for internal linkage things on ELF. On COFF, the 
-  // name of the comdat group identifies the leader symbol of the comdat group. 
-  // The linkage of the leader symbol is considered during comdat resolution, 
-  // and internal symbols with the same name from different objects will not be 
-  // merged. 
-  if (T.isOSBinFormatELF() && F.hasLocalLinkage()) { 
-    if (ModuleId.empty()) 
-      return nullptr; 
-    Name += ModuleId; 
-  } 
- 
-  // Make a new comdat for the function. Use the "no duplicates" selection kind 
-  // for non-weak symbols if the object file format supports it. 
-  Comdat *C = M->getOrInsertComdat(Name); 
-  if (T.isOSBinFormatCOFF() && !F.isWeakForLinker()) 
-    C->setSelectionKind(Comdat::NoDuplicates); 
-  F.setComdat(C); 
-  return C; 
-} 
- 
-/// initializeInstrumentation - Initialize all passes in the TransformUtils 
-/// library. 
-void llvm::initializeInstrumentation(PassRegistry &Registry) { 
-  initializeAddressSanitizerLegacyPassPass(Registry); 
-  initializeModuleAddressSanitizerLegacyPassPass(Registry); 
+//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// Instrumentation library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+/// Moves I before IP. Returns new insert point.
+static BasicBlock::iterator moveBeforeInsertPoint(BasicBlock::iterator I, BasicBlock::iterator IP) {
+  // If I is IP, move the insert point down.
+  if (I == IP) {
+    ++IP;
+  } else {
+    // Otherwise, move I before IP and return IP.
+    I->moveBefore(&*IP);
+  }
+  return IP;
+}
+
+/// Instrumentation passes often insert conditional checks into entry blocks.
+/// Call this function before splitting the entry block to move instructions
+/// that must remain in the entry block up before the split point. Static
+/// allocas and llvm.localescape calls, for example, must remain in the entry
+/// block.
+BasicBlock::iterator llvm::PrepareToSplitEntryBlock(BasicBlock &BB,
+                                                    BasicBlock::iterator IP) {
+  assert(&BB.getParent()->getEntryBlock() == &BB);
+  for (auto I = IP, E = BB.end(); I != E; ++I) {
+    bool KeepInEntry = false;
+    if (auto *AI = dyn_cast<AllocaInst>(I)) {
+      if (AI->isStaticAlloca())
+        KeepInEntry = true;
+    } else if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+      if (II->getIntrinsicID() == llvm::Intrinsic::localescape)
+        KeepInEntry = true;
+    }
+    if (KeepInEntry)
+      IP = moveBeforeInsertPoint(I, IP);
+  }
+  return IP;
+}
+
+// Create a constant for Str so that we can pass it to the run-time lib.
+GlobalVariable *llvm::createPrivateGlobalForString(Module &M, StringRef Str,
+                                                   bool AllowMerging,
+                                                   const char *NamePrefix) {
+  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+  // We use private linkage for module-local strings. If they can be merged
+  // with another one, we set the unnamed_addr attribute.
+  GlobalVariable *GV =
+      new GlobalVariable(M, StrConst->getType(), true,
+                         GlobalValue::PrivateLinkage, StrConst, NamePrefix);
+  if (AllowMerging)
+    GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  GV->setAlignment(Align(1)); // Strings may not be merged w/o setting
+                              // alignment explicitly.
+  return GV;
+}
+
+Comdat *llvm::GetOrCreateFunctionComdat(Function &F, Triple &T,
+                                        const std::string &ModuleId) {
+  if (auto Comdat = F.getComdat()) return Comdat;
+  assert(F.hasName());
+  Module *M = F.getParent();
+  std::string Name = std::string(F.getName());
+
+  // Make a unique comdat name for internal linkage things on ELF. On COFF, the
+  // name of the comdat group identifies the leader symbol of the comdat group.
+  // The linkage of the leader symbol is considered during comdat resolution,
+  // and internal symbols with the same name from different objects will not be
+  // merged.
+  if (T.isOSBinFormatELF() && F.hasLocalLinkage()) {
+    if (ModuleId.empty())
+      return nullptr;
+    Name += ModuleId;
+  }
+
+  // Make a new comdat for the function. Use the "no duplicates" selection kind
+  // for non-weak symbols if the object file format supports it.
+  Comdat *C = M->getOrInsertComdat(Name);
+  if (T.isOSBinFormatCOFF() && !F.isWeakForLinker())
+    C->setSelectionKind(Comdat::NoDuplicates);
+  F.setComdat(C);
+  return C;
+}
+
+/// initializeInstrumentation - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeInstrumentation(PassRegistry &Registry) {
+  initializeAddressSanitizerLegacyPassPass(Registry);
+  initializeModuleAddressSanitizerLegacyPassPass(Registry);
   initializeMemProfilerLegacyPassPass(Registry);
   initializeModuleMemProfilerLegacyPassPass(Registry);
-  initializeBoundsCheckingLegacyPassPass(Registry); 
-  initializeControlHeightReductionLegacyPassPass(Registry); 
-  initializeGCOVProfilerLegacyPassPass(Registry); 
-  initializePGOInstrumentationGenLegacyPassPass(Registry); 
-  initializePGOInstrumentationUseLegacyPassPass(Registry); 
-  initializePGOIndirectCallPromotionLegacyPassPass(Registry); 
-  initializePGOMemOPSizeOptLegacyPassPass(Registry); 
-  initializeCGProfileLegacyPassPass(Registry); 
-  initializeInstrOrderFileLegacyPassPass(Registry); 
-  initializeInstrProfilingLegacyPassPass(Registry); 
-  initializeMemorySanitizerLegacyPassPass(Registry); 
-  initializeHWAddressSanitizerLegacyPassPass(Registry); 
-  initializeThreadSanitizerLegacyPassPass(Registry); 
-  initializeModuleSanitizerCoverageLegacyPassPass(Registry); 
+  initializeBoundsCheckingLegacyPassPass(Registry);
+  initializeControlHeightReductionLegacyPassPass(Registry);
+  initializeGCOVProfilerLegacyPassPass(Registry);
+  initializePGOInstrumentationGenLegacyPassPass(Registry);
+  initializePGOInstrumentationUseLegacyPassPass(Registry);
+  initializePGOIndirectCallPromotionLegacyPassPass(Registry);
+  initializePGOMemOPSizeOptLegacyPassPass(Registry);
+  initializeCGProfileLegacyPassPass(Registry);
+  initializeInstrOrderFileLegacyPassPass(Registry);
+  initializeInstrProfilingLegacyPassPass(Registry);
+  initializeMemorySanitizerLegacyPassPass(Registry);
+  initializeHWAddressSanitizerLegacyPassPass(Registry);
+  initializeThreadSanitizerLegacyPassPass(Registry);
+  initializeModuleSanitizerCoverageLegacyPassPass(Registry);
   initializeDataFlowSanitizerLegacyPassPass(Registry);
-} 
- 
-/// LLVMInitializeInstrumentation - C binding for 
-/// initializeInstrumentation. 
-void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) { 
-  initializeInstrumentation(*unwrap(R)); 
-} 
+}
+
+/// LLVMInitializeInstrumentation - C binding for
+/// initializeInstrumentation.
+void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) {
+  initializeInstrumentation(*unwrap(R));
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4159f82db5..7a6874584d 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1,1176 +1,1176 @@
-//===- MemorySanitizer.cpp - detector of uninitialized reads --------------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-/// \file 
-/// This file is a part of MemorySanitizer, a detector of uninitialized 
-/// reads. 
-/// 
-/// The algorithm of the tool is similar to Memcheck 
-/// (http://goo.gl/QKbem). We associate a few shadow bits with every 
-/// byte of the application memory, poison the shadow of the malloc-ed 
-/// or alloca-ed memory, load the shadow bits on every memory read, 
-/// propagate the shadow bits through some of the arithmetic 
-/// instruction (including MOV), store the shadow bits on every memory 
-/// write, report a bug on some other instructions (e.g. JMP) if the 
-/// associated shadow is poisoned. 
-/// 
-/// But there are differences too. The first and the major one: 
-/// compiler instrumentation instead of binary instrumentation. This 
-/// gives us much better register allocation, possible compiler 
-/// optimizations and a fast start-up. But this brings the major issue 
-/// as well: msan needs to see all program events, including system 
-/// calls and reads/writes in system libraries, so we either need to 
-/// compile *everything* with msan or use a binary translation 
-/// component (e.g. DynamoRIO) to instrument pre-built libraries. 
-/// Another difference from Memcheck is that we use 8 shadow bits per 
-/// byte of application memory and use a direct shadow mapping. This 
-/// greatly simplifies the instrumentation code and avoids races on 
-/// shadow updates (Memcheck is single-threaded so races are not a 
-/// concern there. Memcheck uses 2 shadow bits per byte with a slow 
-/// path storage that uses 8 bits per byte). 
-/// 
-/// The default value of shadow is 0, which means "clean" (not poisoned). 
-/// 
-/// Every module initializer should call __msan_init to ensure that the 
-/// shadow memory is ready. On error, __msan_warning is called. Since 
-/// parameters and return values may be passed via registers, we have a 
-/// specialized thread-local shadow for return values 
-/// (__msan_retval_tls) and parameters (__msan_param_tls). 
-/// 
-///                           Origin tracking. 
-/// 
-/// MemorySanitizer can track origins (allocation points) of all uninitialized 
-/// values. This behavior is controlled with a flag (msan-track-origins) and is 
-/// disabled by default. 
-/// 
-/// Origins are 4-byte values created and interpreted by the runtime library. 
-/// They are stored in a second shadow mapping, one 4-byte value for 4 bytes 
-/// of application memory. Propagation of origins is basically a bunch of 
-/// "select" instructions that pick the origin of a dirty argument, if an 
-/// instruction has one. 
-/// 
-/// Every 4 aligned, consecutive bytes of application memory have one origin 
-/// value associated with them. If these bytes contain uninitialized data 
-/// coming from 2 different allocations, the last store wins. Because of this, 
-/// MemorySanitizer reports can show unrelated origins, but this is unlikely in 
-/// practice. 
-/// 
-/// Origins are meaningless for fully initialized values, so MemorySanitizer 
-/// avoids storing origin to memory when a fully initialized value is stored. 
-/// This way it avoids needless overwriting origin of the 4-byte region on 
-/// a short (i.e. 1 byte) clean store, and it is also good for performance. 
-/// 
-///                            Atomic handling. 
-/// 
-/// Ideally, every atomic store of application value should update the 
-/// corresponding shadow location in an atomic way. Unfortunately, atomic store 
-/// of two disjoint locations can not be done without severe slowdown. 
-/// 
-/// Therefore, we implement an approximation that may err on the safe side. 
-/// In this implementation, every atomically accessed location in the program 
-/// may only change from (partially) uninitialized to fully initialized, but 
-/// not the other way around. We load the shadow _after_ the application load, 
-/// and we store the shadow _before_ the app store. Also, we always store clean 
-/// shadow (if the application store is atomic). This way, if the store-load 
-/// pair constitutes a happens-before arc, shadow store and load are correctly 
-/// ordered such that the load will get either the value that was stored, or 
-/// some later value (which is always clean). 
-/// 
-/// This does not work very well with Compare-And-Swap (CAS) and 
-/// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW 
-/// must store the new shadow before the app operation, and load the shadow 
-/// after the app operation. Computers don't work this way. Current 
-/// implementation ignores the load aspect of CAS/RMW, always returning a clean 
-/// value. It implements the store part as a simple atomic store by storing a 
-/// clean shadow. 
-/// 
-///                      Instrumenting inline assembly. 
-/// 
-/// For inline assembly code LLVM has little idea about which memory locations 
-/// become initialized depending on the arguments. It can be possible to figure 
-/// out which arguments are meant to point to inputs and outputs, but the 
-/// actual semantics can be only visible at runtime. In the Linux kernel it's 
-/// also possible that the arguments only indicate the offset for a base taken 
-/// from a segment register, so it's dangerous to treat any asm() arguments as 
-/// pointers. We take a conservative approach generating calls to 
-///   __msan_instrument_asm_store(ptr, size) 
-/// , which defer the memory unpoisoning to the runtime library. 
-/// The latter can perform more complex address checks to figure out whether 
-/// it's safe to touch the shadow memory. 
-/// Like with atomic operations, we call __msan_instrument_asm_store() before 
-/// the assembly call, so that changes to the shadow memory will be seen by 
-/// other threads together with main memory initialization. 
-/// 
-///                  KernelMemorySanitizer (KMSAN) implementation. 
-/// 
-/// The major differences between KMSAN and MSan instrumentation are: 
-///  - KMSAN always tracks the origins and implies msan-keep-going=true; 
-///  - KMSAN allocates shadow and origin memory for each page separately, so 
-///    there are no explicit accesses to shadow and origin in the 
-///    instrumentation. 
-///    Shadow and origin values for a particular X-byte memory location 
-///    (X=1,2,4,8) are accessed through pointers obtained via the 
-///      __msan_metadata_ptr_for_load_X(ptr) 
-///      __msan_metadata_ptr_for_store_X(ptr) 
-///    functions. The corresponding functions check that the X-byte accesses 
-///    are possible and returns the pointers to shadow and origin memory. 
-///    Arbitrary sized accesses are handled with: 
-///      __msan_metadata_ptr_for_load_n(ptr, size) 
-///      __msan_metadata_ptr_for_store_n(ptr, size); 
-///  - TLS variables are stored in a single per-task struct. A call to a 
-///    function __msan_get_context_state() returning a pointer to that struct 
-///    is inserted into every instrumented function before the entry block; 
-///  - __msan_warning() takes a 32-bit origin parameter; 
-///  - local variables are poisoned with __msan_poison_alloca() upon function 
-///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the 
-///    function; 
-///  - the pass doesn't declare any global variables or add global constructors 
-///    to the translation unit. 
-/// 
-/// Also, KMSAN currently ignores uninitialized memory passed into inline asm 
-/// calls, making sure we're on the safe side wrt. possible false positives. 
-/// 
-///  KernelMemorySanitizer only supports X86_64 at the moment. 
-/// 
-// 
-// FIXME: This sanitizer does not yet handle scalable vectors 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/MemorySanitizer.h" 
-#include "llvm/ADT/APInt.h" 
-#include "llvm/ADT/ArrayRef.h" 
-#include "llvm/ADT/DepthFirstIterator.h" 
-#include "llvm/ADT/SmallSet.h" 
-#include "llvm/ADT/SmallString.h" 
-#include "llvm/ADT/SmallVector.h" 
-#include "llvm/ADT/StringExtras.h" 
-#include "llvm/ADT/StringRef.h" 
-#include "llvm/ADT/Triple.h" 
-#include "llvm/Analysis/TargetLibraryInfo.h" 
+//===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file is a part of MemorySanitizer, a detector of uninitialized
+/// reads.
+///
+/// The algorithm of the tool is similar to Memcheck
+/// (http://goo.gl/QKbem). We associate a few shadow bits with every
+/// byte of the application memory, poison the shadow of the malloc-ed
+/// or alloca-ed memory, load the shadow bits on every memory read,
+/// propagate the shadow bits through some of the arithmetic
+/// instruction (including MOV), store the shadow bits on every memory
+/// write, report a bug on some other instructions (e.g. JMP) if the
+/// associated shadow is poisoned.
+///
+/// But there are differences too. The first and the major one:
+/// compiler instrumentation instead of binary instrumentation. This
+/// gives us much better register allocation, possible compiler
+/// optimizations and a fast start-up. But this brings the major issue
+/// as well: msan needs to see all program events, including system
+/// calls and reads/writes in system libraries, so we either need to
+/// compile *everything* with msan or use a binary translation
+/// component (e.g. DynamoRIO) to instrument pre-built libraries.
+/// Another difference from Memcheck is that we use 8 shadow bits per
+/// byte of application memory and use a direct shadow mapping. This
+/// greatly simplifies the instrumentation code and avoids races on
+/// shadow updates (Memcheck is single-threaded so races are not a
+/// concern there. Memcheck uses 2 shadow bits per byte with a slow
+/// path storage that uses 8 bits per byte).
+///
+/// The default value of shadow is 0, which means "clean" (not poisoned).
+///
+/// Every module initializer should call __msan_init to ensure that the
+/// shadow memory is ready. On error, __msan_warning is called. Since
+/// parameters and return values may be passed via registers, we have a
+/// specialized thread-local shadow for return values
+/// (__msan_retval_tls) and parameters (__msan_param_tls).
+///
+///                           Origin tracking.
+///
+/// MemorySanitizer can track origins (allocation points) of all uninitialized
+/// values. This behavior is controlled with a flag (msan-track-origins) and is
+/// disabled by default.
+///
+/// Origins are 4-byte values created and interpreted by the runtime library.
+/// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
+/// of application memory. Propagation of origins is basically a bunch of
+/// "select" instructions that pick the origin of a dirty argument, if an
+/// instruction has one.
+///
+/// Every 4 aligned, consecutive bytes of application memory have one origin
+/// value associated with them. If these bytes contain uninitialized data
+/// coming from 2 different allocations, the last store wins. Because of this,
+/// MemorySanitizer reports can show unrelated origins, but this is unlikely in
+/// practice.
+///
+/// Origins are meaningless for fully initialized values, so MemorySanitizer
+/// avoids storing origin to memory when a fully initialized value is stored.
+/// This way it avoids needless overwriting origin of the 4-byte region on
+/// a short (i.e. 1 byte) clean store, and it is also good for performance.
+///
+///                            Atomic handling.
+///
+/// Ideally, every atomic store of application value should update the
+/// corresponding shadow location in an atomic way. Unfortunately, atomic store
+/// of two disjoint locations can not be done without severe slowdown.
+///
+/// Therefore, we implement an approximation that may err on the safe side.
+/// In this implementation, every atomically accessed location in the program
+/// may only change from (partially) uninitialized to fully initialized, but
+/// not the other way around. We load the shadow _after_ the application load,
+/// and we store the shadow _before_ the app store. Also, we always store clean
+/// shadow (if the application store is atomic). This way, if the store-load
+/// pair constitutes a happens-before arc, shadow store and load are correctly
+/// ordered such that the load will get either the value that was stored, or
+/// some later value (which is always clean).
+///
+/// This does not work very well with Compare-And-Swap (CAS) and
+/// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
+/// must store the new shadow before the app operation, and load the shadow
+/// after the app operation. Computers don't work this way. Current
+/// implementation ignores the load aspect of CAS/RMW, always returning a clean
+/// value. It implements the store part as a simple atomic store by storing a
+/// clean shadow.
+///
+///                      Instrumenting inline assembly.
+///
+/// For inline assembly code LLVM has little idea about which memory locations
+/// become initialized depending on the arguments. It can be possible to figure
+/// out which arguments are meant to point to inputs and outputs, but the
+/// actual semantics can be only visible at runtime. In the Linux kernel it's
+/// also possible that the arguments only indicate the offset for a base taken
+/// from a segment register, so it's dangerous to treat any asm() arguments as
+/// pointers. We take a conservative approach generating calls to
+///   __msan_instrument_asm_store(ptr, size)
+/// , which defer the memory unpoisoning to the runtime library.
+/// The latter can perform more complex address checks to figure out whether
+/// it's safe to touch the shadow memory.
+/// Like with atomic operations, we call __msan_instrument_asm_store() before
+/// the assembly call, so that changes to the shadow memory will be seen by
+/// other threads together with main memory initialization.
+///
+///                  KernelMemorySanitizer (KMSAN) implementation.
+///
+/// The major differences between KMSAN and MSan instrumentation are:
+///  - KMSAN always tracks the origins and implies msan-keep-going=true;
+///  - KMSAN allocates shadow and origin memory for each page separately, so
+///    there are no explicit accesses to shadow and origin in the
+///    instrumentation.
+///    Shadow and origin values for a particular X-byte memory location
+///    (X=1,2,4,8) are accessed through pointers obtained via the
+///      __msan_metadata_ptr_for_load_X(ptr)
+///      __msan_metadata_ptr_for_store_X(ptr)
+///    functions. The corresponding functions check that the X-byte accesses
+///    are possible and returns the pointers to shadow and origin memory.
+///    Arbitrary sized accesses are handled with:
+///      __msan_metadata_ptr_for_load_n(ptr, size)
+///      __msan_metadata_ptr_for_store_n(ptr, size);
+///  - TLS variables are stored in a single per-task struct. A call to a
+///    function __msan_get_context_state() returning a pointer to that struct
+///    is inserted into every instrumented function before the entry block;
+///  - __msan_warning() takes a 32-bit origin parameter;
+///  - local variables are poisoned with __msan_poison_alloca() upon function
+///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
+///    function;
+///  - the pass doesn't declare any global variables or add global constructors
+///    to the translation unit.
+///
+/// Also, KMSAN currently ignores uninitialized memory passed into inline asm
+/// calls, making sure we're on the safe side wrt. possible false positives.
+///
+///  KernelMemorySanitizer only supports X86_64 at the moment.
+///
+//
+// FIXME: This sanitizer does not yet handle scalable vectors
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Argument.h" 
-#include "llvm/IR/Attributes.h" 
-#include "llvm/IR/BasicBlock.h" 
-#include "llvm/IR/CallingConv.h" 
-#include "llvm/IR/Constant.h" 
-#include "llvm/IR/Constants.h" 
-#include "llvm/IR/DataLayout.h" 
-#include "llvm/IR/DerivedTypes.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/GlobalValue.h" 
-#include "llvm/IR/GlobalVariable.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/InlineAsm.h" 
-#include "llvm/IR/InstVisitor.h" 
-#include "llvm/IR/InstrTypes.h" 
-#include "llvm/IR/Instruction.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/IR/Intrinsics.h" 
-#include "llvm/IR/IntrinsicsX86.h" 
-#include "llvm/IR/LLVMContext.h" 
-#include "llvm/IR/MDBuilder.h" 
-#include "llvm/IR/Module.h" 
-#include "llvm/IR/Type.h" 
-#include "llvm/IR/Value.h" 
-#include "llvm/IR/ValueMap.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Pass.h" 
-#include "llvm/Support/AtomicOrdering.h" 
-#include "llvm/Support/Casting.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Compiler.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/ErrorHandling.h" 
-#include "llvm/Support/MathExtras.h" 
-#include "llvm/Support/raw_ostream.h" 
-#include "llvm/Transforms/Instrumentation.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include "llvm/Transforms/Utils/Local.h" 
-#include "llvm/Transforms/Utils/ModuleUtils.h" 
-#include <algorithm> 
-#include <cassert> 
-#include <cstddef> 
-#include <cstdint> 
-#include <memory> 
-#include <string> 
-#include <tuple> 
- 
-using namespace llvm; 
- 
-#define DEBUG_TYPE "msan" 
- 
-static const unsigned kOriginSize = 4; 
-static const Align kMinOriginAlignment = Align(4); 
-static const Align kShadowTLSAlignment = Align(8); 
- 
-// These constants must be kept in sync with the ones in msan.h. 
-static const unsigned kParamTLSSize = 800; 
-static const unsigned kRetvalTLSSize = 800; 
- 
-// Accesses sizes are powers of two: 1, 2, 4, 8. 
-static const size_t kNumberOfAccessSizes = 4; 
- 
-/// Track origins of uninitialized values. 
-/// 
-/// Adds a section to MemorySanitizer report that points to the allocation 
-/// (stack or heap) the uninitialized bits came from originally. 
-static cl::opt<int> ClTrackOrigins("msan-track-origins", 
-       cl::desc("Track origins (allocation sites) of poisoned memory"), 
-       cl::Hidden, cl::init(0)); 
- 
-static cl::opt<bool> ClKeepGoing("msan-keep-going", 
-       cl::desc("keep going after reporting a UMR"), 
-       cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClPoisonStack("msan-poison-stack", 
-       cl::desc("poison uninitialized stack variables"), 
-       cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call", 
-       cl::desc("poison uninitialized stack variables with a call"), 
-       cl::Hidden, cl::init(false)); 
- 
-static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern", 
-       cl::desc("poison uninitialized stack variables with the given pattern"), 
-       cl::Hidden, cl::init(0xff)); 
- 
-static cl::opt<bool> ClPoisonUndef("msan-poison-undef", 
-       cl::desc("poison undef temps"), 
-       cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClHandleICmp("msan-handle-icmp", 
-       cl::desc("propagate shadow through ICmpEQ and ICmpNE"), 
-       cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact", 
-       cl::desc("exact handling of relational integer ICmp"), 
-       cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClHandleLifetimeIntrinsics( 
-    "msan-handle-lifetime-intrinsics", 
-    cl::desc( 
-        "when possible, poison scoped variables at the beginning of the scope " 
-        "(slower, but more precise)"), 
-    cl::Hidden, cl::init(true)); 
- 
-// When compiling the Linux kernel, we sometimes see false positives related to 
-// MSan being unable to understand that inline assembly calls may initialize 
-// local variables. 
-// This flag makes the compiler conservatively unpoison every memory location 
-// passed into an assembly call. Note that this may cause false positives. 
-// Because it's impossible to figure out the array sizes, we can only unpoison 
-// the first sizeof(type) bytes for each type* pointer. 
-// The instrumentation is only enabled in KMSAN builds, and only if 
-// -msan-handle-asm-conservative is on. This is done because we may want to 
-// quickly disable assembly instrumentation when it breaks. 
-static cl::opt<bool> ClHandleAsmConservative( 
-    "msan-handle-asm-conservative", 
-    cl::desc("conservative handling of inline assembly"), cl::Hidden, 
-    cl::init(true)); 
- 
-// This flag controls whether we check the shadow of the address 
-// operand of load or store. Such bugs are very rare, since load from 
-// a garbage address typically results in SEGV, but still happen 
-// (e.g. only lower bits of address are garbage, or the access happens 
-// early at program startup where malloc-ed memory is more likely to 
-// be zeroed. As of 2012-08-28 this flag adds 20% slowdown. 
-static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address", 
-       cl::desc("report accesses through a pointer which has poisoned shadow"), 
-       cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClEagerChecks( 
-    "msan-eager-checks", 
-    cl::desc("check arguments and return values at function call boundaries"), 
-    cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions", 
-       cl::desc("print out instructions with default strict semantics"), 
-       cl::Hidden, cl::init(false)); 
- 
-static cl::opt<int> ClInstrumentationWithCallThreshold( 
-    "msan-instrumentation-with-call-threshold", 
-    cl::desc( 
-        "If the function being instrumented requires more than " 
-        "this number of checks and origin stores, use callbacks instead of " 
-        "inline checks (-1 means never use callbacks)."), 
-    cl::Hidden, cl::init(3500)); 
- 
-static cl::opt<bool> 
-    ClEnableKmsan("msan-kernel", 
-                  cl::desc("Enable KernelMemorySanitizer instrumentation"), 
-                  cl::Hidden, cl::init(false)); 
- 
-// This is an experiment to enable handling of cases where shadow is a non-zero 
-// compile-time constant. For some unexplainable reason they were silently 
-// ignored in the instrumentation. 
-static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow", 
-       cl::desc("Insert checks for constant shadow values"), 
-       cl::Hidden, cl::init(false)); 
- 
-// This is off by default because of a bug in gold: 
-// https://sourceware.org/bugzilla/show_bug.cgi?id=19002 
-static cl::opt<bool> ClWithComdat("msan-with-comdat", 
-       cl::desc("Place MSan constructors in comdat sections"), 
-       cl::Hidden, cl::init(false)); 
- 
-// These options allow to specify custom memory map parameters 
-// See MemoryMapParams for details. 
-static cl::opt<uint64_t> ClAndMask("msan-and-mask", 
-                                   cl::desc("Define custom MSan AndMask"), 
-                                   cl::Hidden, cl::init(0)); 
- 
-static cl::opt<uint64_t> ClXorMask("msan-xor-mask", 
-                                   cl::desc("Define custom MSan XorMask"), 
-                                   cl::Hidden, cl::init(0)); 
- 
-static cl::opt<uint64_t> ClShadowBase("msan-shadow-base", 
-                                      cl::desc("Define custom MSan ShadowBase"), 
-                                      cl::Hidden, cl::init(0)); 
- 
-static cl::opt<uint64_t> ClOriginBase("msan-origin-base", 
-                                      cl::desc("Define custom MSan OriginBase"), 
-                                      cl::Hidden, cl::init(0)); 
- 
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "msan"
+
+static const unsigned kOriginSize = 4;
+static const Align kMinOriginAlignment = Align(4);
+static const Align kShadowTLSAlignment = Align(8);
+
+// These constants must be kept in sync with the ones in msan.h.
+static const unsigned kParamTLSSize = 800;
+static const unsigned kRetvalTLSSize = 800;
+
+// Accesses sizes are powers of two: 1, 2, 4, 8.
+static const size_t kNumberOfAccessSizes = 4;
+
+/// Track origins of uninitialized values.
+///
+/// Adds a section to MemorySanitizer report that points to the allocation
+/// (stack or heap) the uninitialized bits came from originally.
+static cl::opt<int> ClTrackOrigins("msan-track-origins",
+       cl::desc("Track origins (allocation sites) of poisoned memory"),
+       cl::Hidden, cl::init(0));
+
+static cl::opt<bool> ClKeepGoing("msan-keep-going",
+       cl::desc("keep going after reporting a UMR"),
+       cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClPoisonStack("msan-poison-stack",
+       cl::desc("poison uninitialized stack variables"),
+       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
+       cl::desc("poison uninitialized stack variables with a call"),
+       cl::Hidden, cl::init(false));
+
+static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
+       cl::desc("poison uninitialized stack variables with the given pattern"),
+       cl::Hidden, cl::init(0xff));
+
+static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
+       cl::desc("poison undef temps"),
+       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
+       cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
+       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
+       cl::desc("exact handling of relational integer ICmp"),
+       cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClHandleLifetimeIntrinsics(
+    "msan-handle-lifetime-intrinsics",
+    cl::desc(
+        "when possible, poison scoped variables at the beginning of the scope "
+        "(slower, but more precise)"),
+    cl::Hidden, cl::init(true));
+
+// When compiling the Linux kernel, we sometimes see false positives related to
+// MSan being unable to understand that inline assembly calls may initialize
+// local variables.
+// This flag makes the compiler conservatively unpoison every memory location
+// passed into an assembly call. Note that this may cause false positives.
+// Because it's impossible to figure out the array sizes, we can only unpoison
+// the first sizeof(type) bytes for each type* pointer.
+// The instrumentation is only enabled in KMSAN builds, and only if
+// -msan-handle-asm-conservative is on. This is done because we may want to
+// quickly disable assembly instrumentation when it breaks.
+static cl::opt<bool> ClHandleAsmConservative(
+    "msan-handle-asm-conservative",
+    cl::desc("conservative handling of inline assembly"), cl::Hidden,
+    cl::init(true));
+
+// This flag controls whether we check the shadow of the address
+// operand of load or store. Such bugs are very rare, since load from
+// a garbage address typically results in SEGV, but still happen
+// (e.g. only lower bits of address are garbage, or the access happens
+// early at program startup where malloc-ed memory is more likely to
+// be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
+static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
+       cl::desc("report accesses through a pointer which has poisoned shadow"),
+       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClEagerChecks(
+    "msan-eager-checks",
+    cl::desc("check arguments and return values at function call boundaries"),
+    cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
+       cl::desc("print out instructions with default strict semantics"),
+       cl::Hidden, cl::init(false));
+
+static cl::opt<int> ClInstrumentationWithCallThreshold(
+    "msan-instrumentation-with-call-threshold",
+    cl::desc(
+        "If the function being instrumented requires more than "
+        "this number of checks and origin stores, use callbacks instead of "
+        "inline checks (-1 means never use callbacks)."),
+    cl::Hidden, cl::init(3500));
+
+static cl::opt<bool>
+    ClEnableKmsan("msan-kernel",
+                  cl::desc("Enable KernelMemorySanitizer instrumentation"),
+                  cl::Hidden, cl::init(false));
+
+// This is an experiment to enable handling of cases where shadow is a non-zero
+// compile-time constant. For some unexplainable reason they were silently
+// ignored in the instrumentation.
+static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
+       cl::desc("Insert checks for constant shadow values"),
+       cl::Hidden, cl::init(false));
+
+// This is off by default because of a bug in gold:
+// https://sourceware.org/bugzilla/show_bug.cgi?id=19002
+static cl::opt<bool> ClWithComdat("msan-with-comdat",
+       cl::desc("Place MSan constructors in comdat sections"),
+       cl::Hidden, cl::init(false));
+
+// These options allow to specify custom memory map parameters
+// See MemoryMapParams for details.
+static cl::opt<uint64_t> ClAndMask("msan-and-mask",
+                                   cl::desc("Define custom MSan AndMask"),
+                                   cl::Hidden, cl::init(0));
+
+static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
+                                   cl::desc("Define custom MSan XorMask"),
+                                   cl::Hidden, cl::init(0));
+
+static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
+                                      cl::desc("Define custom MSan ShadowBase"),
+                                      cl::Hidden, cl::init(0));
+
+static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
+                                      cl::desc("Define custom MSan OriginBase"),
+                                      cl::Hidden, cl::init(0));
+
 const char kMsanModuleCtorName[] = "msan.module_ctor";
 const char kMsanInitName[] = "__msan_init";
- 
-namespace { 
- 
-// Memory map parameters used in application-to-shadow address calculation. 
-// Offset = (Addr & ~AndMask) ^ XorMask 
-// Shadow = ShadowBase + Offset 
-// Origin = OriginBase + Offset 
-struct MemoryMapParams { 
-  uint64_t AndMask; 
-  uint64_t XorMask; 
-  uint64_t ShadowBase; 
-  uint64_t OriginBase; 
-}; 
- 
-struct PlatformMemoryMapParams { 
-  const MemoryMapParams *bits32; 
-  const MemoryMapParams *bits64; 
-}; 
- 
-} // end anonymous namespace 
- 
-// i386 Linux 
-static const MemoryMapParams Linux_I386_MemoryMapParams = { 
-  0x000080000000,  // AndMask 
-  0,               // XorMask (not used) 
-  0,               // ShadowBase (not used) 
-  0x000040000000,  // OriginBase 
-}; 
- 
-// x86_64 Linux 
-static const MemoryMapParams Linux_X86_64_MemoryMapParams = { 
-#ifdef MSAN_LINUX_X86_64_OLD_MAPPING 
-  0x400000000000,  // AndMask 
-  0,               // XorMask (not used) 
-  0,               // ShadowBase (not used) 
-  0x200000000000,  // OriginBase 
-#else 
-  0,               // AndMask (not used) 
-  0x500000000000,  // XorMask 
-  0,               // ShadowBase (not used) 
-  0x100000000000,  // OriginBase 
-#endif 
-}; 
- 
-// mips64 Linux 
-static const MemoryMapParams Linux_MIPS64_MemoryMapParams = { 
-  0,               // AndMask (not used) 
-  0x008000000000,  // XorMask 
-  0,               // ShadowBase (not used) 
-  0x002000000000,  // OriginBase 
-}; 
- 
-// ppc64 Linux 
-static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = { 
-  0xE00000000000,  // AndMask 
-  0x100000000000,  // XorMask 
-  0x080000000000,  // ShadowBase 
-  0x1C0000000000,  // OriginBase 
-}; 
- 
-// s390x Linux 
-static const MemoryMapParams Linux_S390X_MemoryMapParams = { 
-    0xC00000000000, // AndMask 
-    0,              // XorMask (not used) 
-    0x080000000000, // ShadowBase 
-    0x1C0000000000, // OriginBase 
-}; 
- 
-// aarch64 Linux 
-static const MemoryMapParams Linux_AArch64_MemoryMapParams = { 
-  0,               // AndMask (not used) 
-  0x06000000000,   // XorMask 
-  0,               // ShadowBase (not used) 
-  0x01000000000,   // OriginBase 
-}; 
- 
-// i386 FreeBSD 
-static const MemoryMapParams FreeBSD_I386_MemoryMapParams = { 
-  0x000180000000,  // AndMask 
-  0x000040000000,  // XorMask 
-  0x000020000000,  // ShadowBase 
-  0x000700000000,  // OriginBase 
-}; 
- 
-// x86_64 FreeBSD 
-static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = { 
-  0xc00000000000,  // AndMask 
-  0x200000000000,  // XorMask 
-  0x100000000000,  // ShadowBase 
-  0x380000000000,  // OriginBase 
-}; 
- 
-// x86_64 NetBSD 
-static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = { 
-  0,               // AndMask 
-  0x500000000000,  // XorMask 
-  0,               // ShadowBase 
-  0x100000000000,  // OriginBase 
-}; 
- 
-static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = { 
-  &Linux_I386_MemoryMapParams, 
-  &Linux_X86_64_MemoryMapParams, 
-}; 
- 
-static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = { 
-  nullptr, 
-  &Linux_MIPS64_MemoryMapParams, 
-}; 
- 
-static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = { 
-  nullptr, 
-  &Linux_PowerPC64_MemoryMapParams, 
-}; 
- 
-static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = { 
-    nullptr, 
-    &Linux_S390X_MemoryMapParams, 
-}; 
- 
-static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = { 
-  nullptr, 
-  &Linux_AArch64_MemoryMapParams, 
-}; 
- 
-static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = { 
-  &FreeBSD_I386_MemoryMapParams, 
-  &FreeBSD_X86_64_MemoryMapParams, 
-}; 
- 
-static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = { 
-  nullptr, 
-  &NetBSD_X86_64_MemoryMapParams, 
-}; 
- 
-namespace { 
- 
-/// Instrument functions of a module to detect uninitialized reads. 
-/// 
-/// Instantiating MemorySanitizer inserts the msan runtime library API function 
-/// declarations into the module if they don't exist already. Instantiating 
-/// ensures the __msan_init function is in the list of global constructors for 
-/// the module. 
-class MemorySanitizer { 
-public: 
-  MemorySanitizer(Module &M, MemorySanitizerOptions Options) 
-      : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins), 
-        Recover(Options.Recover) { 
-    initializeModule(M); 
-  } 
- 
-  // MSan cannot be moved or copied because of MapParams. 
-  MemorySanitizer(MemorySanitizer &&) = delete; 
-  MemorySanitizer &operator=(MemorySanitizer &&) = delete; 
-  MemorySanitizer(const MemorySanitizer &) = delete; 
-  MemorySanitizer &operator=(const MemorySanitizer &) = delete; 
- 
-  bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI); 
- 
-private: 
-  friend struct MemorySanitizerVisitor; 
-  friend struct VarArgAMD64Helper; 
-  friend struct VarArgMIPS64Helper; 
-  friend struct VarArgAArch64Helper; 
-  friend struct VarArgPowerPC64Helper; 
-  friend struct VarArgSystemZHelper; 
- 
-  void initializeModule(Module &M); 
-  void initializeCallbacks(Module &M); 
-  void createKernelApi(Module &M); 
-  void createUserspaceApi(Module &M); 
- 
-  /// True if we're compiling the Linux kernel. 
-  bool CompileKernel; 
-  /// Track origins (allocation points) of uninitialized values. 
-  int TrackOrigins; 
-  bool Recover; 
- 
-  LLVMContext *C; 
-  Type *IntptrTy; 
-  Type *OriginTy; 
- 
-  // XxxTLS variables represent the per-thread state in MSan and per-task state 
-  // in KMSAN. 
-  // For the userspace these point to thread-local globals. In the kernel land 
-  // they point to the members of a per-task struct obtained via a call to 
-  // __msan_get_context_state(). 
- 
-  /// Thread-local shadow storage for function parameters. 
-  Value *ParamTLS; 
- 
-  /// Thread-local origin storage for function parameters. 
-  Value *ParamOriginTLS; 
- 
-  /// Thread-local shadow storage for function return value. 
-  Value *RetvalTLS; 
- 
-  /// Thread-local origin storage for function return value. 
-  Value *RetvalOriginTLS; 
- 
-  /// Thread-local shadow storage for in-register va_arg function 
-  /// parameters (x86_64-specific). 
-  Value *VAArgTLS; 
- 
-  /// Thread-local shadow storage for in-register va_arg function 
-  /// parameters (x86_64-specific). 
-  Value *VAArgOriginTLS; 
- 
-  /// Thread-local shadow storage for va_arg overflow area 
-  /// (x86_64-specific). 
-  Value *VAArgOverflowSizeTLS; 
- 
-  /// Are the instrumentation callbacks set up? 
-  bool CallbacksInitialized = false; 
- 
-  /// The run-time callback to print a warning. 
-  FunctionCallee WarningFn; 
- 
-  // These arrays are indexed by log2(AccessSize). 
-  FunctionCallee MaybeWarningFn[kNumberOfAccessSizes]; 
-  FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes]; 
- 
-  /// Run-time helper that generates a new origin value for a stack 
-  /// allocation. 
-  FunctionCallee MsanSetAllocaOrigin4Fn; 
- 
-  /// Run-time helper that poisons stack on function entry. 
-  FunctionCallee MsanPoisonStackFn; 
- 
-  /// Run-time helper that records a store (or any event) of an 
-  /// uninitialized value and returns an updated origin id encoding this info. 
-  FunctionCallee MsanChainOriginFn; 
- 
+
+namespace {
+
+// Memory map parameters used in application-to-shadow address calculation.
+// Offset = (Addr & ~AndMask) ^ XorMask
+// Shadow = ShadowBase + Offset
+// Origin = OriginBase + Offset
+struct MemoryMapParams {
+  uint64_t AndMask;
+  uint64_t XorMask;
+  uint64_t ShadowBase;
+  uint64_t OriginBase;
+};
+
+struct PlatformMemoryMapParams {
+  const MemoryMapParams *bits32;
+  const MemoryMapParams *bits64;
+};
+
+} // end anonymous namespace
+
+// i386 Linux
+static const MemoryMapParams Linux_I386_MemoryMapParams = {
+  0x000080000000,  // AndMask
+  0,               // XorMask (not used)
+  0,               // ShadowBase (not used)
+  0x000040000000,  // OriginBase
+};
+
+// x86_64 Linux
+static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
+#ifdef MSAN_LINUX_X86_64_OLD_MAPPING
+  0x400000000000,  // AndMask
+  0,               // XorMask (not used)
+  0,               // ShadowBase (not used)
+  0x200000000000,  // OriginBase
+#else
+  0,               // AndMask (not used)
+  0x500000000000,  // XorMask
+  0,               // ShadowBase (not used)
+  0x100000000000,  // OriginBase
+#endif
+};
+
+// mips64 Linux
+static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
+  0,               // AndMask (not used)
+  0x008000000000,  // XorMask
+  0,               // ShadowBase (not used)
+  0x002000000000,  // OriginBase
+};
+
+// ppc64 Linux
+static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
+  0xE00000000000,  // AndMask
+  0x100000000000,  // XorMask
+  0x080000000000,  // ShadowBase
+  0x1C0000000000,  // OriginBase
+};
+
+// s390x Linux
+static const MemoryMapParams Linux_S390X_MemoryMapParams = {
+    0xC00000000000, // AndMask
+    0,              // XorMask (not used)
+    0x080000000000, // ShadowBase
+    0x1C0000000000, // OriginBase
+};
+
+// aarch64 Linux
+static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
+  0,               // AndMask (not used)
+  0x06000000000,   // XorMask
+  0,               // ShadowBase (not used)
+  0x01000000000,   // OriginBase
+};
+
+// i386 FreeBSD
+static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
+  0x000180000000,  // AndMask
+  0x000040000000,  // XorMask
+  0x000020000000,  // ShadowBase
+  0x000700000000,  // OriginBase
+};
+
+// x86_64 FreeBSD
+static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
+  0xc00000000000,  // AndMask
+  0x200000000000,  // XorMask
+  0x100000000000,  // ShadowBase
+  0x380000000000,  // OriginBase
+};
+
+// x86_64 NetBSD
+static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
+  0,               // AndMask
+  0x500000000000,  // XorMask
+  0,               // ShadowBase
+  0x100000000000,  // OriginBase
+};
+
+static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
+  &Linux_I386_MemoryMapParams,
+  &Linux_X86_64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
+  nullptr,
+  &Linux_MIPS64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
+  nullptr,
+  &Linux_PowerPC64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
+    nullptr,
+    &Linux_S390X_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
+  nullptr,
+  &Linux_AArch64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
+  &FreeBSD_I386_MemoryMapParams,
+  &FreeBSD_X86_64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
+  nullptr,
+  &NetBSD_X86_64_MemoryMapParams,
+};
+
+namespace {
+
+/// Instrument functions of a module to detect uninitialized reads.
+///
+/// Instantiating MemorySanitizer inserts the msan runtime library API function
+/// declarations into the module if they don't exist already. Instantiating
+/// ensures the __msan_init function is in the list of global constructors for
+/// the module.
+class MemorySanitizer {
+public:
+  MemorySanitizer(Module &M, MemorySanitizerOptions Options)
+      : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
+        Recover(Options.Recover) {
+    initializeModule(M);
+  }
+
+  // MSan cannot be moved or copied because of MapParams.
+  MemorySanitizer(MemorySanitizer &&) = delete;
+  MemorySanitizer &operator=(MemorySanitizer &&) = delete;
+  MemorySanitizer(const MemorySanitizer &) = delete;
+  MemorySanitizer &operator=(const MemorySanitizer &) = delete;
+
+  bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
+
+private:
+  friend struct MemorySanitizerVisitor;
+  friend struct VarArgAMD64Helper;
+  friend struct VarArgMIPS64Helper;
+  friend struct VarArgAArch64Helper;
+  friend struct VarArgPowerPC64Helper;
+  friend struct VarArgSystemZHelper;
+
+  void initializeModule(Module &M);
+  void initializeCallbacks(Module &M);
+  void createKernelApi(Module &M);
+  void createUserspaceApi(Module &M);
+
+  /// True if we're compiling the Linux kernel.
+  bool CompileKernel;
+  /// Track origins (allocation points) of uninitialized values.
+  int TrackOrigins;
+  bool Recover;
+
+  LLVMContext *C;
+  Type *IntptrTy;
+  Type *OriginTy;
+
+  // XxxTLS variables represent the per-thread state in MSan and per-task state
+  // in KMSAN.
+  // For the userspace these point to thread-local globals. In the kernel land
+  // they point to the members of a per-task struct obtained via a call to
+  // __msan_get_context_state().
+
+  /// Thread-local shadow storage for function parameters.
+  Value *ParamTLS;
+
+  /// Thread-local origin storage for function parameters.
+  Value *ParamOriginTLS;
+
+  /// Thread-local shadow storage for function return value.
+  Value *RetvalTLS;
+
+  /// Thread-local origin storage for function return value.
+  Value *RetvalOriginTLS;
+
+  /// Thread-local shadow storage for in-register va_arg function
+  /// parameters (x86_64-specific).
+  Value *VAArgTLS;
+
+  /// Thread-local shadow storage for in-register va_arg function
+  /// parameters (x86_64-specific).
+  Value *VAArgOriginTLS;
+
+  /// Thread-local shadow storage for va_arg overflow area
+  /// (x86_64-specific).
+  Value *VAArgOverflowSizeTLS;
+
+  /// Are the instrumentation callbacks set up?
+  bool CallbacksInitialized = false;
+
+  /// The run-time callback to print a warning.
+  FunctionCallee WarningFn;
+
+  // These arrays are indexed by log2(AccessSize).
+  FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
+  FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
+
+  /// Run-time helper that generates a new origin value for a stack
+  /// allocation.
+  FunctionCallee MsanSetAllocaOrigin4Fn;
+
+  /// Run-time helper that poisons stack on function entry.
+  FunctionCallee MsanPoisonStackFn;
+
+  /// Run-time helper that records a store (or any event) of an
+  /// uninitialized value and returns an updated origin id encoding this info.
+  FunctionCallee MsanChainOriginFn;
+
   /// Run-time helper that paints an origin over a region.
   FunctionCallee MsanSetOriginFn;
 
-  /// MSan runtime replacements for memmove, memcpy and memset. 
-  FunctionCallee MemmoveFn, MemcpyFn, MemsetFn; 
- 
-  /// KMSAN callback for task-local function argument shadow. 
-  StructType *MsanContextStateTy; 
-  FunctionCallee MsanGetContextStateFn; 
- 
-  /// Functions for poisoning/unpoisoning local variables 
-  FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn; 
- 
-  /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin 
-  /// pointers. 
-  FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN; 
-  FunctionCallee MsanMetadataPtrForLoad_1_8[4]; 
-  FunctionCallee MsanMetadataPtrForStore_1_8[4]; 
-  FunctionCallee MsanInstrumentAsmStoreFn; 
- 
-  /// Helper to choose between different MsanMetadataPtrXxx(). 
-  FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size); 
- 
-  /// Memory map parameters used in application-to-shadow calculation. 
-  const MemoryMapParams *MapParams; 
- 
-  /// Custom memory map parameters used when -msan-shadow-base or 
-  // -msan-origin-base is provided. 
-  MemoryMapParams CustomMapParams; 
- 
-  MDNode *ColdCallWeights; 
- 
-  /// Branch weights for origin store. 
-  MDNode *OriginStoreWeights; 
-}; 
- 
-void insertModuleCtor(Module &M) { 
-  getOrCreateSanitizerCtorAndInitFunctions( 
-      M, kMsanModuleCtorName, kMsanInitName, 
-      /*InitArgTypes=*/{}, 
-      /*InitArgs=*/{}, 
-      // This callback is invoked when the functions are created the first 
-      // time. Hook them into the global ctors list in that case: 
-      [&](Function *Ctor, FunctionCallee) { 
-        if (!ClWithComdat) { 
-          appendToGlobalCtors(M, Ctor, 0); 
-          return; 
-        } 
-        Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName); 
-        Ctor->setComdat(MsanCtorComdat); 
-        appendToGlobalCtors(M, Ctor, 0, Ctor); 
-      }); 
-} 
- 
-/// A legacy function pass for msan instrumentation. 
-/// 
-/// Instruments functions to detect uninitialized reads. 
-struct MemorySanitizerLegacyPass : public FunctionPass { 
-  // Pass identification, replacement for typeid. 
-  static char ID; 
- 
-  MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {}) 
-      : FunctionPass(ID), Options(Options) { 
-    initializeMemorySanitizerLegacyPassPass(*PassRegistry::getPassRegistry()); 
-  } 
-  StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; } 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.addRequired<TargetLibraryInfoWrapperPass>(); 
-  } 
- 
-  bool runOnFunction(Function &F) override { 
-    return MSan->sanitizeFunction( 
-        F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F)); 
-  } 
-  bool doInitialization(Module &M) override; 
- 
-  Optional<MemorySanitizer> MSan; 
-  MemorySanitizerOptions Options; 
-}; 
- 
-template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) { 
-  return (Opt.getNumOccurrences() > 0) ? Opt : Default; 
-} 
- 
-} // end anonymous namespace 
- 
-MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K) 
-    : Kernel(getOptOrDefault(ClEnableKmsan, K)), 
-      TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)), 
-      Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {} 
- 
-PreservedAnalyses MemorySanitizerPass::run(Function &F, 
-                                           FunctionAnalysisManager &FAM) { 
-  MemorySanitizer Msan(*F.getParent(), Options); 
-  if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F))) 
-    return PreservedAnalyses::none(); 
-  return PreservedAnalyses::all(); 
-} 
- 
-PreservedAnalyses MemorySanitizerPass::run(Module &M, 
-                                           ModuleAnalysisManager &AM) { 
-  if (Options.Kernel) 
-    return PreservedAnalyses::all(); 
-  insertModuleCtor(M); 
-  return PreservedAnalyses::none(); 
-} 
- 
-char MemorySanitizerLegacyPass::ID = 0; 
- 
-INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan", 
-                      "MemorySanitizer: detects uninitialized reads.", false, 
-                      false) 
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 
-INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan", 
-                    "MemorySanitizer: detects uninitialized reads.", false, 
-                    false) 
- 
-FunctionPass * 
-llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) { 
-  return new MemorySanitizerLegacyPass(Options); 
-} 
- 
-/// Create a non-const global initialized with the given string. 
-/// 
-/// Creates a writable global for Str so that we can pass it to the 
-/// run-time lib. Runtime uses first 4 bytes of the string to store the 
-/// frame ID, so the string needs to be mutable. 
-static GlobalVariable *createPrivateNonConstGlobalForString(Module &M, 
-                                                            StringRef Str) { 
-  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); 
-  return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false, 
-                            GlobalValue::PrivateLinkage, StrConst, ""); 
-} 
- 
-/// Create KMSAN API callbacks. 
-void MemorySanitizer::createKernelApi(Module &M) { 
-  IRBuilder<> IRB(*C); 
- 
-  // These will be initialized in insertKmsanPrologue(). 
-  RetvalTLS = nullptr; 
-  RetvalOriginTLS = nullptr; 
-  ParamTLS = nullptr; 
-  ParamOriginTLS = nullptr; 
-  VAArgTLS = nullptr; 
-  VAArgOriginTLS = nullptr; 
-  VAArgOverflowSizeTLS = nullptr; 
- 
-  WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(), 
-                                    IRB.getInt32Ty()); 
-  // Requests the per-task context state (kmsan_context_state*) from the 
-  // runtime library. 
-  MsanContextStateTy = StructType::get( 
-      ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), 
-      ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8), 
-      ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), 
-      ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */ 
-      IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy, 
-      OriginTy); 
-  MsanGetContextStateFn = M.getOrInsertFunction( 
-      "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0)); 
- 
-  Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0), 
-                                PointerType::get(IRB.getInt32Ty(), 0)); 
- 
-  for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) { 
-    std::string name_load = 
-        "__msan_metadata_ptr_for_load_" + std::to_string(size); 
-    std::string name_store = 
-        "__msan_metadata_ptr_for_store_" + std::to_string(size); 
-    MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction( 
-        name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0)); 
-    MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction( 
-        name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0)); 
-  } 
- 
-  MsanMetadataPtrForLoadN = M.getOrInsertFunction( 
-      "__msan_metadata_ptr_for_load_n", RetTy, 
-      PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty()); 
-  MsanMetadataPtrForStoreN = M.getOrInsertFunction( 
-      "__msan_metadata_ptr_for_store_n", RetTy, 
-      PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty()); 
- 
-  // Functions for poisoning and unpoisoning memory. 
-  MsanPoisonAllocaFn = 
-      M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(), 
-                            IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy()); 
-  MsanUnpoisonAllocaFn = M.getOrInsertFunction( 
-      "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy); 
-} 
- 
-static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) { 
-  return M.getOrInsertGlobal(Name, Ty, [&] { 
-    return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, 
-                              nullptr, Name, nullptr, 
-                              GlobalVariable::InitialExecTLSModel); 
-  }); 
-} 
- 
-/// Insert declarations for userspace-specific functions and globals. 
-void MemorySanitizer::createUserspaceApi(Module &M) { 
-  IRBuilder<> IRB(*C); 
- 
-  // Create the callback. 
-  // FIXME: this function should have "Cold" calling conv, 
-  // which is not yet implemented. 
-  StringRef WarningFnName = Recover ? "__msan_warning_with_origin" 
-                                    : "__msan_warning_with_origin_noreturn"; 
-  WarningFn = 
-      M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), IRB.getInt32Ty()); 
- 
-  // Create the global TLS variables. 
-  RetvalTLS = 
-      getOrInsertGlobal(M, "__msan_retval_tls", 
-                        ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8)); 
- 
-  RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy); 
- 
-  ParamTLS = 
-      getOrInsertGlobal(M, "__msan_param_tls", 
-                        ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8)); 
- 
-  ParamOriginTLS = 
-      getOrInsertGlobal(M, "__msan_param_origin_tls", 
-                        ArrayType::get(OriginTy, kParamTLSSize / 4)); 
- 
-  VAArgTLS = 
-      getOrInsertGlobal(M, "__msan_va_arg_tls", 
-                        ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8)); 
- 
-  VAArgOriginTLS = 
-      getOrInsertGlobal(M, "__msan_va_arg_origin_tls", 
-                        ArrayType::get(OriginTy, kParamTLSSize / 4)); 
- 
-  VAArgOverflowSizeTLS = 
-      getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty()); 
- 
-  for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; 
-       AccessSizeIndex++) { 
-    unsigned AccessSize = 1 << AccessSizeIndex; 
-    std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize); 
-    SmallVector<std::pair<unsigned, Attribute>, 2> MaybeWarningFnAttrs; 
-    MaybeWarningFnAttrs.push_back(std::make_pair( 
-        AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt))); 
-    MaybeWarningFnAttrs.push_back(std::make_pair( 
-        AttributeList::FirstArgIndex + 1, Attribute::get(*C, Attribute::ZExt))); 
-    MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction( 
-        FunctionName, AttributeList::get(*C, MaybeWarningFnAttrs), 
-        IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty()); 
- 
-    FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); 
-    SmallVector<std::pair<unsigned, Attribute>, 2> MaybeStoreOriginFnAttrs; 
-    MaybeStoreOriginFnAttrs.push_back(std::make_pair( 
-        AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt))); 
-    MaybeStoreOriginFnAttrs.push_back(std::make_pair( 
-        AttributeList::FirstArgIndex + 2, Attribute::get(*C, Attribute::ZExt))); 
-    MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( 
-        FunctionName, AttributeList::get(*C, MaybeStoreOriginFnAttrs), 
-        IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(), 
-        IRB.getInt32Ty()); 
-  } 
- 
-  MsanSetAllocaOrigin4Fn = M.getOrInsertFunction( 
-    "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, 
-    IRB.getInt8PtrTy(), IntptrTy); 
-  MsanPoisonStackFn = 
-      M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(), 
-                            IRB.getInt8PtrTy(), IntptrTy); 
-} 
- 
-/// Insert extern declaration of runtime-provided functions and globals. 
-void MemorySanitizer::initializeCallbacks(Module &M) { 
-  // Only do this once. 
-  if (CallbacksInitialized) 
-    return; 
- 
-  IRBuilder<> IRB(*C); 
-  // Initialize callbacks that are common for kernel and userspace 
-  // instrumentation. 
-  MsanChainOriginFn = M.getOrInsertFunction( 
-    "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty()); 
+  /// MSan runtime replacements for memmove, memcpy and memset.
+  FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
+
+  /// KMSAN callback for task-local function argument shadow.
+  StructType *MsanContextStateTy;
+  FunctionCallee MsanGetContextStateFn;
+
+  /// Functions for poisoning/unpoisoning local variables
+  FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
+
+  /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
+  /// pointers.
+  FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
+  FunctionCallee MsanMetadataPtrForLoad_1_8[4];
+  FunctionCallee MsanMetadataPtrForStore_1_8[4];
+  FunctionCallee MsanInstrumentAsmStoreFn;
+
+  /// Helper to choose between different MsanMetadataPtrXxx().
+  FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
+
+  /// Memory map parameters used in application-to-shadow calculation.
+  const MemoryMapParams *MapParams;
+
+  /// Custom memory map parameters used when -msan-shadow-base or
+  // -msan-origin-base is provided.
+  MemoryMapParams CustomMapParams;
+
+  MDNode *ColdCallWeights;
+
+  /// Branch weights for origin store.
+  MDNode *OriginStoreWeights;
+};
+
+void insertModuleCtor(Module &M) {
+  getOrCreateSanitizerCtorAndInitFunctions(
+      M, kMsanModuleCtorName, kMsanInitName,
+      /*InitArgTypes=*/{},
+      /*InitArgs=*/{},
+      // This callback is invoked when the functions are created the first
+      // time. Hook them into the global ctors list in that case:
+      [&](Function *Ctor, FunctionCallee) {
+        if (!ClWithComdat) {
+          appendToGlobalCtors(M, Ctor, 0);
+          return;
+        }
+        Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
+        Ctor->setComdat(MsanCtorComdat);
+        appendToGlobalCtors(M, Ctor, 0, Ctor);
+      });
+}
+
+/// A legacy function pass for msan instrumentation.
+///
+/// Instruments functions to detect uninitialized reads.
+struct MemorySanitizerLegacyPass : public FunctionPass {
+  // Pass identification, replacement for typeid.
+  static char ID;
+
+  MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
+      : FunctionPass(ID), Options(Options) {
+    initializeMemorySanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+  StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+
+  bool runOnFunction(Function &F) override {
+    return MSan->sanitizeFunction(
+        F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
+  }
+  bool doInitialization(Module &M) override;
+
+  Optional<MemorySanitizer> MSan;
+  MemorySanitizerOptions Options;
+};
+
+template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
+  return (Opt.getNumOccurrences() > 0) ? Opt : Default;
+}
+
+} // end anonymous namespace
+
+MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K)
+    : Kernel(getOptOrDefault(ClEnableKmsan, K)),
+      TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
+      Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {}
+
+PreservedAnalyses MemorySanitizerPass::run(Function &F,
+                                           FunctionAnalysisManager &FAM) {
+  MemorySanitizer Msan(*F.getParent(), Options);
+  if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+PreservedAnalyses MemorySanitizerPass::run(Module &M,
+                                           ModuleAnalysisManager &AM) {
+  if (Options.Kernel)
+    return PreservedAnalyses::all();
+  insertModuleCtor(M);
+  return PreservedAnalyses::none();
+}
+
+char MemorySanitizerLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
+                      "MemorySanitizer: detects uninitialized reads.", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
+                    "MemorySanitizer: detects uninitialized reads.", false,
+                    false)
+
+FunctionPass *
+llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
+  return new MemorySanitizerLegacyPass(Options);
+}
+
+/// Create a non-const global initialized with the given string.
+///
+/// Creates a writable global for Str so that we can pass it to the
+/// run-time lib. Runtime uses first 4 bytes of the string to store the
+/// frame ID, so the string needs to be mutable.
+static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
+                                                            StringRef Str) {
+  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+  return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
+                            GlobalValue::PrivateLinkage, StrConst, "");
+}
+
+/// Create KMSAN API callbacks.
+void MemorySanitizer::createKernelApi(Module &M) {
+  IRBuilder<> IRB(*C);
+
+  // These will be initialized in insertKmsanPrologue().
+  RetvalTLS = nullptr;
+  RetvalOriginTLS = nullptr;
+  ParamTLS = nullptr;
+  ParamOriginTLS = nullptr;
+  VAArgTLS = nullptr;
+  VAArgOriginTLS = nullptr;
+  VAArgOverflowSizeTLS = nullptr;
+
+  WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(),
+                                    IRB.getInt32Ty());
+  // Requests the per-task context state (kmsan_context_state*) from the
+  // runtime library.
+  MsanContextStateTy = StructType::get(
+      ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+      ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
+      ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+      ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
+      IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
+      OriginTy);
+  MsanGetContextStateFn = M.getOrInsertFunction(
+      "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
+
+  Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
+                                PointerType::get(IRB.getInt32Ty(), 0));
+
+  for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
+    std::string name_load =
+        "__msan_metadata_ptr_for_load_" + std::to_string(size);
+    std::string name_store =
+        "__msan_metadata_ptr_for_store_" + std::to_string(size);
+    MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
+        name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
+    MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
+        name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
+  }
+
+  MsanMetadataPtrForLoadN = M.getOrInsertFunction(
+      "__msan_metadata_ptr_for_load_n", RetTy,
+      PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
+  MsanMetadataPtrForStoreN = M.getOrInsertFunction(
+      "__msan_metadata_ptr_for_store_n", RetTy,
+      PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
+
+  // Functions for poisoning and unpoisoning memory.
+  MsanPoisonAllocaFn =
+      M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
+                            IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
+  MsanUnpoisonAllocaFn = M.getOrInsertFunction(
+      "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
+}
+
+static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
+  return M.getOrInsertGlobal(Name, Ty, [&] {
+    return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
+                              nullptr, Name, nullptr,
+                              GlobalVariable::InitialExecTLSModel);
+  });
+}
+
+/// Insert declarations for userspace-specific functions and globals.
+void MemorySanitizer::createUserspaceApi(Module &M) {
+  IRBuilder<> IRB(*C);
+
+  // Create the callback.
+  // FIXME: this function should have "Cold" calling conv,
+  // which is not yet implemented.
+  StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
+                                    : "__msan_warning_with_origin_noreturn";
+  WarningFn =
+      M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), IRB.getInt32Ty());
+
+  // Create the global TLS variables.
+  RetvalTLS =
+      getOrInsertGlobal(M, "__msan_retval_tls",
+                        ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
+
+  RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
+
+  ParamTLS =
+      getOrInsertGlobal(M, "__msan_param_tls",
+                        ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
+
+  ParamOriginTLS =
+      getOrInsertGlobal(M, "__msan_param_origin_tls",
+                        ArrayType::get(OriginTy, kParamTLSSize / 4));
+
+  VAArgTLS =
+      getOrInsertGlobal(M, "__msan_va_arg_tls",
+                        ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
+
+  VAArgOriginTLS =
+      getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
+                        ArrayType::get(OriginTy, kParamTLSSize / 4));
+
+  VAArgOverflowSizeTLS =
+      getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
+
+  for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+       AccessSizeIndex++) {
+    unsigned AccessSize = 1 << AccessSizeIndex;
+    std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
+    SmallVector<std::pair<unsigned, Attribute>, 2> MaybeWarningFnAttrs;
+    MaybeWarningFnAttrs.push_back(std::make_pair(
+        AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
+    MaybeWarningFnAttrs.push_back(std::make_pair(
+        AttributeList::FirstArgIndex + 1, Attribute::get(*C, Attribute::ZExt)));
+    MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
+        FunctionName, AttributeList::get(*C, MaybeWarningFnAttrs),
+        IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
+
+    FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
+    SmallVector<std::pair<unsigned, Attribute>, 2> MaybeStoreOriginFnAttrs;
+    MaybeStoreOriginFnAttrs.push_back(std::make_pair(
+        AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
+    MaybeStoreOriginFnAttrs.push_back(std::make_pair(
+        AttributeList::FirstArgIndex + 2, Attribute::get(*C, Attribute::ZExt)));
+    MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
+        FunctionName, AttributeList::get(*C, MaybeStoreOriginFnAttrs),
+        IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(),
+        IRB.getInt32Ty());
+  }
+
+  MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
+    "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
+    IRB.getInt8PtrTy(), IntptrTy);
+  MsanPoisonStackFn =
+      M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
+                            IRB.getInt8PtrTy(), IntptrTy);
+}
+
+/// Insert extern declaration of runtime-provided functions and globals.
+void MemorySanitizer::initializeCallbacks(Module &M) {
+  // Only do this once.
+  if (CallbacksInitialized)
+    return;
+
+  IRBuilder<> IRB(*C);
+  // Initialize callbacks that are common for kernel and userspace
+  // instrumentation.
+  MsanChainOriginFn = M.getOrInsertFunction(
+    "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
   MsanSetOriginFn =
       M.getOrInsertFunction("__msan_set_origin", IRB.getVoidTy(),
                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt32Ty());
-  MemmoveFn = M.getOrInsertFunction( 
-    "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 
-    IRB.getInt8PtrTy(), IntptrTy); 
-  MemcpyFn = M.getOrInsertFunction( 
-    "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 
-    IntptrTy); 
-  MemsetFn = M.getOrInsertFunction( 
-    "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), 
-    IntptrTy); 
- 
-  MsanInstrumentAsmStoreFn = 
-      M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(), 
-                            PointerType::get(IRB.getInt8Ty(), 0), IntptrTy); 
- 
-  if (CompileKernel) { 
-    createKernelApi(M); 
-  } else { 
-    createUserspaceApi(M); 
-  } 
-  CallbacksInitialized = true; 
-} 
- 
-FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore, 
-                                                             int size) { 
-  FunctionCallee *Fns = 
-      isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8; 
-  switch (size) { 
-  case 1: 
-    return Fns[0]; 
-  case 2: 
-    return Fns[1]; 
-  case 4: 
-    return Fns[2]; 
-  case 8: 
-    return Fns[3]; 
-  default: 
-    return nullptr; 
-  } 
-} 
- 
-/// Module-level initialization. 
-/// 
-/// inserts a call to __msan_init to the module's constructor list. 
-void MemorySanitizer::initializeModule(Module &M) { 
-  auto &DL = M.getDataLayout(); 
- 
-  bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0; 
-  bool OriginPassed = ClOriginBase.getNumOccurrences() > 0; 
-  // Check the overrides first 
-  if (ShadowPassed || OriginPassed) { 
-    CustomMapParams.AndMask = ClAndMask; 
-    CustomMapParams.XorMask = ClXorMask; 
-    CustomMapParams.ShadowBase = ClShadowBase; 
-    CustomMapParams.OriginBase = ClOriginBase; 
-    MapParams = &CustomMapParams; 
-  } else { 
-    Triple TargetTriple(M.getTargetTriple()); 
-    switch (TargetTriple.getOS()) { 
-      case Triple::FreeBSD: 
-        switch (TargetTriple.getArch()) { 
-          case Triple::x86_64: 
-            MapParams = FreeBSD_X86_MemoryMapParams.bits64; 
-            break; 
-          case Triple::x86: 
-            MapParams = FreeBSD_X86_MemoryMapParams.bits32; 
-            break; 
-          default: 
-            report_fatal_error("unsupported architecture"); 
-        } 
-        break; 
-      case Triple::NetBSD: 
-        switch (TargetTriple.getArch()) { 
-          case Triple::x86_64: 
-            MapParams = NetBSD_X86_MemoryMapParams.bits64; 
-            break; 
-          default: 
-            report_fatal_error("unsupported architecture"); 
-        } 
-        break; 
-      case Triple::Linux: 
-        switch (TargetTriple.getArch()) { 
-          case Triple::x86_64: 
-            MapParams = Linux_X86_MemoryMapParams.bits64; 
-            break; 
-          case Triple::x86: 
-            MapParams = Linux_X86_MemoryMapParams.bits32; 
-            break; 
-          case Triple::mips64: 
-          case Triple::mips64el: 
-            MapParams = Linux_MIPS_MemoryMapParams.bits64; 
-            break; 
-          case Triple::ppc64: 
-          case Triple::ppc64le: 
-            MapParams = Linux_PowerPC_MemoryMapParams.bits64; 
-            break; 
-          case Triple::systemz: 
-            MapParams = Linux_S390_MemoryMapParams.bits64; 
-            break; 
-          case Triple::aarch64: 
-          case Triple::aarch64_be: 
-            MapParams = Linux_ARM_MemoryMapParams.bits64; 
-            break; 
-          default: 
-            report_fatal_error("unsupported architecture"); 
-        } 
-        break; 
-      default: 
-        report_fatal_error("unsupported operating system"); 
-    } 
-  } 
- 
-  C = &(M.getContext()); 
-  IRBuilder<> IRB(*C); 
-  IntptrTy = IRB.getIntPtrTy(DL); 
-  OriginTy = IRB.getInt32Ty(); 
- 
-  ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000); 
-  OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000); 
- 
-  if (!CompileKernel) { 
-    if (TrackOrigins) 
-      M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] { 
-        return new GlobalVariable( 
-            M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage, 
-            IRB.getInt32(TrackOrigins), "__msan_track_origins"); 
-      }); 
- 
-    if (Recover) 
-      M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] { 
-        return new GlobalVariable(M, IRB.getInt32Ty(), true, 
-                                  GlobalValue::WeakODRLinkage, 
-                                  IRB.getInt32(Recover), "__msan_keep_going"); 
-      }); 
-} 
-} 
- 
-bool MemorySanitizerLegacyPass::doInitialization(Module &M) { 
-  if (!Options.Kernel) 
-    insertModuleCtor(M); 
-  MSan.emplace(M, Options); 
-  return true; 
-} 
- 
-namespace { 
- 
-/// A helper class that handles instrumentation of VarArg 
-/// functions on a particular platform. 
-/// 
-/// Implementations are expected to insert the instrumentation 
-/// necessary to propagate argument shadow through VarArg function 
-/// calls. Visit* methods are called during an InstVisitor pass over 
-/// the function, and should avoid creating new basic blocks. A new 
-/// instance of this class is created for each instrumented function. 
-struct VarArgHelper { 
-  virtual ~VarArgHelper() = default; 
- 
-  /// Visit a CallBase. 
-  virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0; 
- 
-  /// Visit a va_start call. 
-  virtual void visitVAStartInst(VAStartInst &I) = 0; 
- 
-  /// Visit a va_copy call. 
-  virtual void visitVACopyInst(VACopyInst &I) = 0; 
- 
-  /// Finalize function instrumentation. 
-  /// 
-  /// This method is called after visiting all interesting (see above) 
-  /// instructions in a function. 
-  virtual void finalizeInstrumentation() = 0; 
-}; 
- 
-struct MemorySanitizerVisitor; 
- 
-} // end anonymous namespace 
- 
-static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, 
-                                        MemorySanitizerVisitor &Visitor); 
- 
-static unsigned TypeSizeToSizeIndex(unsigned TypeSize) { 
-  if (TypeSize <= 8) return 0; 
-  return Log2_32_Ceil((TypeSize + 7) / 8); 
-} 
- 
-namespace { 
- 
-/// This class does all the work for a given function. Store and Load 
-/// instructions store and load corresponding shadow and origin 
-/// values. Most instructions propagate shadow from arguments to their 
-/// return values. Certain instructions (most importantly, BranchInst) 
-/// test their argument shadow and print reports (with a runtime call) if it's 
-/// non-zero. 
-struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { 
-  Function &F; 
-  MemorySanitizer &MS; 
-  SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes; 
-  ValueMap<Value*, Value*> ShadowMap, OriginMap; 
-  std::unique_ptr<VarArgHelper> VAHelper; 
-  const TargetLibraryInfo *TLI; 
+  MemmoveFn = M.getOrInsertFunction(
+    "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+    IRB.getInt8PtrTy(), IntptrTy);
+  MemcpyFn = M.getOrInsertFunction(
+    "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+    IntptrTy);
+  MemsetFn = M.getOrInsertFunction(
+    "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
+    IntptrTy);
+
+  MsanInstrumentAsmStoreFn =
+      M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
+                            PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
+
+  if (CompileKernel) {
+    createKernelApi(M);
+  } else {
+    createUserspaceApi(M);
+  }
+  CallbacksInitialized = true;
+}
+
+FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
+                                                             int size) {
+  FunctionCallee *Fns =
+      isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
+  switch (size) {
+  case 1:
+    return Fns[0];
+  case 2:
+    return Fns[1];
+  case 4:
+    return Fns[2];
+  case 8:
+    return Fns[3];
+  default:
+    return nullptr;
+  }
+}
+
+/// Module-level initialization.
+///
+/// inserts a call to __msan_init to the module's constructor list.
+void MemorySanitizer::initializeModule(Module &M) {
+  auto &DL = M.getDataLayout();
+
+  bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
+  bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
+  // Check the overrides first
+  if (ShadowPassed || OriginPassed) {
+    CustomMapParams.AndMask = ClAndMask;
+    CustomMapParams.XorMask = ClXorMask;
+    CustomMapParams.ShadowBase = ClShadowBase;
+    CustomMapParams.OriginBase = ClOriginBase;
+    MapParams = &CustomMapParams;
+  } else {
+    Triple TargetTriple(M.getTargetTriple());
+    switch (TargetTriple.getOS()) {
+      case Triple::FreeBSD:
+        switch (TargetTriple.getArch()) {
+          case Triple::x86_64:
+            MapParams = FreeBSD_X86_MemoryMapParams.bits64;
+            break;
+          case Triple::x86:
+            MapParams = FreeBSD_X86_MemoryMapParams.bits32;
+            break;
+          default:
+            report_fatal_error("unsupported architecture");
+        }
+        break;
+      case Triple::NetBSD:
+        switch (TargetTriple.getArch()) {
+          case Triple::x86_64:
+            MapParams = NetBSD_X86_MemoryMapParams.bits64;
+            break;
+          default:
+            report_fatal_error("unsupported architecture");
+        }
+        break;
+      case Triple::Linux:
+        switch (TargetTriple.getArch()) {
+          case Triple::x86_64:
+            MapParams = Linux_X86_MemoryMapParams.bits64;
+            break;
+          case Triple::x86:
+            MapParams = Linux_X86_MemoryMapParams.bits32;
+            break;
+          case Triple::mips64:
+          case Triple::mips64el:
+            MapParams = Linux_MIPS_MemoryMapParams.bits64;
+            break;
+          case Triple::ppc64:
+          case Triple::ppc64le:
+            MapParams = Linux_PowerPC_MemoryMapParams.bits64;
+            break;
+          case Triple::systemz:
+            MapParams = Linux_S390_MemoryMapParams.bits64;
+            break;
+          case Triple::aarch64:
+          case Triple::aarch64_be:
+            MapParams = Linux_ARM_MemoryMapParams.bits64;
+            break;
+          default:
+            report_fatal_error("unsupported architecture");
+        }
+        break;
+      default:
+        report_fatal_error("unsupported operating system");
+    }
+  }
+
+  C = &(M.getContext());
+  IRBuilder<> IRB(*C);
+  IntptrTy = IRB.getIntPtrTy(DL);
+  OriginTy = IRB.getInt32Ty();
+
+  ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
+  OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
+
+  if (!CompileKernel) {
+    if (TrackOrigins)
+      M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
+        return new GlobalVariable(
+            M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+            IRB.getInt32(TrackOrigins), "__msan_track_origins");
+      });
+
+    if (Recover)
+      M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
+        return new GlobalVariable(M, IRB.getInt32Ty(), true,
+                                  GlobalValue::WeakODRLinkage,
+                                  IRB.getInt32(Recover), "__msan_keep_going");
+      });
+}
+}
+
+bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
+  if (!Options.Kernel)
+    insertModuleCtor(M);
+  MSan.emplace(M, Options);
+  return true;
+}
+
+namespace {
+
+/// A helper class that handles instrumentation of VarArg
+/// functions on a particular platform.
+///
+/// Implementations are expected to insert the instrumentation
+/// necessary to propagate argument shadow through VarArg function
+/// calls. Visit* methods are called during an InstVisitor pass over
+/// the function, and should avoid creating new basic blocks. A new
+/// instance of this class is created for each instrumented function.
+struct VarArgHelper {
+  virtual ~VarArgHelper() = default;
+
+  /// Visit a CallBase.
+  virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
+
+  /// Visit a va_start call.
+  virtual void visitVAStartInst(VAStartInst &I) = 0;
+
+  /// Visit a va_copy call.
+  virtual void visitVACopyInst(VACopyInst &I) = 0;
+
+  /// Finalize function instrumentation.
+  ///
+  /// This method is called after visiting all interesting (see above)
+  /// instructions in a function.
+  virtual void finalizeInstrumentation() = 0;
+};
+
+struct MemorySanitizerVisitor;
+
+} // end anonymous namespace
+
+static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+                                        MemorySanitizerVisitor &Visitor);
+
+static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
+  if (TypeSize <= 8) return 0;
+  return Log2_32_Ceil((TypeSize + 7) / 8);
+}
+
+namespace {
+
+/// This class does all the work for a given function. Store and Load
+/// instructions store and load corresponding shadow and origin
+/// values. Most instructions propagate shadow from arguments to their
+/// return values. Certain instructions (most importantly, BranchInst)
+/// test their argument shadow and print reports (with a runtime call) if it's
+/// non-zero.
+struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
+  Function &F;
+  MemorySanitizer &MS;
+  SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
+  ValueMap<Value*, Value*> ShadowMap, OriginMap;
+  std::unique_ptr<VarArgHelper> VAHelper;
+  const TargetLibraryInfo *TLI;
   Instruction *FnPrologueEnd;
- 
-  // The following flags disable parts of MSan instrumentation based on 
-  // exclusion list contents and command-line options. 
-  bool InsertChecks; 
-  bool PropagateShadow; 
-  bool PoisonStack; 
-  bool PoisonUndef; 
- 
-  struct ShadowOriginAndInsertPoint { 
-    Value *Shadow; 
-    Value *Origin; 
-    Instruction *OrigIns; 
- 
-    ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I) 
-      : Shadow(S), Origin(O), OrigIns(I) {} 
-  }; 
-  SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList; 
-  bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics; 
-  SmallSet<AllocaInst *, 16> AllocaSet; 
-  SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList; 
-  SmallVector<StoreInst *, 16> StoreList; 
- 
-  MemorySanitizerVisitor(Function &F, MemorySanitizer &MS, 
-                         const TargetLibraryInfo &TLI) 
-      : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) { 
-    bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory); 
-    InsertChecks = SanitizeFunction; 
-    PropagateShadow = SanitizeFunction; 
-    PoisonStack = SanitizeFunction && ClPoisonStack; 
-    PoisonUndef = SanitizeFunction && ClPoisonUndef; 
- 
+
+  // The following flags disable parts of MSan instrumentation based on
+  // exclusion list contents and command-line options.
+  bool InsertChecks;
+  bool PropagateShadow;
+  bool PoisonStack;
+  bool PoisonUndef;
+
+  struct ShadowOriginAndInsertPoint {
+    Value *Shadow;
+    Value *Origin;
+    Instruction *OrigIns;
+
+    ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
+      : Shadow(S), Origin(O), OrigIns(I) {}
+  };
+  SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
+  bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
+  SmallSet<AllocaInst *, 16> AllocaSet;
+  SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
+  SmallVector<StoreInst *, 16> StoreList;
+
+  MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
+                         const TargetLibraryInfo &TLI)
+      : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
+    bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
+    InsertChecks = SanitizeFunction;
+    PropagateShadow = SanitizeFunction;
+    PoisonStack = SanitizeFunction && ClPoisonStack;
+    PoisonUndef = SanitizeFunction && ClPoisonUndef;
+
     // In the presence of unreachable blocks, we may see Phi nodes with
     // incoming nodes from such blocks. Since InstVisitor skips unreachable
     // blocks, such nodes will not have any shadow value associated with them.
     // It's easier to remove unreachable blocks than deal with missing shadow.
     removeUnreachableBlocks(F);
 
-    MS.initializeCallbacks(*F.getParent()); 
+    MS.initializeCallbacks(*F.getParent());
     FnPrologueEnd = IRBuilder<>(F.getEntryBlock().getFirstNonPHI())
                         .CreateIntrinsic(Intrinsic::donothing, {}, {});
- 
+
     if (MS.CompileKernel) {
       IRBuilder<> IRB(FnPrologueEnd);
       insertKmsanPrologue(IRB);
     }
 
-    LLVM_DEBUG(if (!InsertChecks) dbgs() 
-               << "MemorySanitizer is not inserting checks into '" 
-               << F.getName() << "'\n"); 
-  } 
- 
+    LLVM_DEBUG(if (!InsertChecks) dbgs()
+               << "MemorySanitizer is not inserting checks into '"
+               << F.getName() << "'\n");
+  }
+
   bool isInPrologue(Instruction &I) {
     return I.getParent() == FnPrologueEnd->getParent() &&
            (&I == FnPrologueEnd || I.comesBefore(FnPrologueEnd));
   }
 
-  Value *updateOrigin(Value *V, IRBuilder<> &IRB) { 
-    if (MS.TrackOrigins <= 1) return V; 
-    return IRB.CreateCall(MS.MsanChainOriginFn, V); 
-  } 
- 
-  Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) { 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy); 
-    if (IntptrSize == kOriginSize) return Origin; 
-    assert(IntptrSize == kOriginSize * 2); 
-    Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false); 
-    return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8)); 
-  } 
- 
-  /// Fill memory range with the given origin value. 
-  void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr, 
-                   unsigned Size, Align Alignment) { 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy); 
-    unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy); 
-    assert(IntptrAlignment >= kMinOriginAlignment); 
-    assert(IntptrSize >= kOriginSize); 
- 
-    unsigned Ofs = 0; 
-    Align CurrentAlignment = Alignment; 
-    if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) { 
-      Value *IntptrOrigin = originToIntptr(IRB, Origin); 
-      Value *IntptrOriginPtr = 
-          IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0)); 
-      for (unsigned i = 0; i < Size / IntptrSize; ++i) { 
-        Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i) 
-                       : IntptrOriginPtr; 
-        IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment); 
-        Ofs += IntptrSize / kOriginSize; 
-        CurrentAlignment = IntptrAlignment; 
-      } 
-    } 
- 
-    for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) { 
-      Value *GEP = 
-          i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr; 
-      IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment); 
-      CurrentAlignment = kMinOriginAlignment; 
-    } 
-  } 
- 
-  void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin, 
-                   Value *OriginPtr, Align Alignment, bool AsCall) { 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment); 
-    unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType()); 
+  Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
+    if (MS.TrackOrigins <= 1) return V;
+    return IRB.CreateCall(MS.MsanChainOriginFn, V);
+  }
+
+  Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
+    if (IntptrSize == kOriginSize) return Origin;
+    assert(IntptrSize == kOriginSize * 2);
+    Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
+    return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
+  }
+
+  /// Fill memory range with the given origin value.
+  void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
+                   unsigned Size, Align Alignment) {
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
+    unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
+    assert(IntptrAlignment >= kMinOriginAlignment);
+    assert(IntptrSize >= kOriginSize);
+
+    unsigned Ofs = 0;
+    Align CurrentAlignment = Alignment;
+    if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
+      Value *IntptrOrigin = originToIntptr(IRB, Origin);
+      Value *IntptrOriginPtr =
+          IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
+      for (unsigned i = 0; i < Size / IntptrSize; ++i) {
+        Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
+                       : IntptrOriginPtr;
+        IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
+        Ofs += IntptrSize / kOriginSize;
+        CurrentAlignment = IntptrAlignment;
+      }
+    }
+
+    for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
+      Value *GEP =
+          i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
+      IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
+      CurrentAlignment = kMinOriginAlignment;
+    }
+  }
+
+  void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
+                   Value *OriginPtr, Align Alignment, bool AsCall) {
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
+    unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
         paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
-                    OriginAlignment); 
+                    OriginAlignment);
       return;
-    } 
+    }
 
     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
@@ -1189,206 +1189,206 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
                   OriginAlignment);
     }
-  } 
- 
-  void materializeStores(bool InstrumentWithCalls) { 
-    for (StoreInst *SI : StoreList) { 
-      IRBuilder<> IRB(SI); 
-      Value *Val = SI->getValueOperand(); 
-      Value *Addr = SI->getPointerOperand(); 
-      Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val); 
-      Value *ShadowPtr, *OriginPtr; 
-      Type *ShadowTy = Shadow->getType(); 
-      const Align Alignment = assumeAligned(SI->getAlignment()); 
-      const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment); 
-      std::tie(ShadowPtr, OriginPtr) = 
-          getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true); 
- 
-      StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment); 
-      LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n"); 
-      (void)NewSI; 
- 
-      if (SI->isAtomic()) 
-        SI->setOrdering(addReleaseOrdering(SI->getOrdering())); 
- 
-      if (MS.TrackOrigins && !SI->isAtomic()) 
-        storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr, 
-                    OriginAlignment, InstrumentWithCalls); 
-    } 
-  } 
- 
-  /// Helper function to insert a warning at IRB's current insert point. 
-  void insertWarningFn(IRBuilder<> &IRB, Value *Origin) { 
-    if (!Origin) 
-      Origin = (Value *)IRB.getInt32(0); 
-    assert(Origin->getType()->isIntegerTy()); 
-    IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge(); 
-    // FIXME: Insert UnreachableInst if !MS.Recover? 
-    // This may invalidate some of the following checks and needs to be done 
-    // at the very end. 
-  } 
- 
-  void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin, 
-                           bool AsCall) { 
-    IRBuilder<> IRB(OrigIns); 
-    LLVM_DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n"); 
+  }
+
+  void materializeStores(bool InstrumentWithCalls) {
+    for (StoreInst *SI : StoreList) {
+      IRBuilder<> IRB(SI);
+      Value *Val = SI->getValueOperand();
+      Value *Addr = SI->getPointerOperand();
+      Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
+      Value *ShadowPtr, *OriginPtr;
+      Type *ShadowTy = Shadow->getType();
+      const Align Alignment = assumeAligned(SI->getAlignment());
+      const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
+      std::tie(ShadowPtr, OriginPtr) =
+          getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
+
+      StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
+      LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
+      (void)NewSI;
+
+      if (SI->isAtomic())
+        SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
+
+      if (MS.TrackOrigins && !SI->isAtomic())
+        storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
+                    OriginAlignment, InstrumentWithCalls);
+    }
+  }
+
+  /// Helper function to insert a warning at IRB's current insert point.
+  void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
+    if (!Origin)
+      Origin = (Value *)IRB.getInt32(0);
+    assert(Origin->getType()->isIntegerTy());
+    IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
+    // FIXME: Insert UnreachableInst if !MS.Recover?
+    // This may invalidate some of the following checks and needs to be done
+    // at the very end.
+  }
+
+  void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
+                           bool AsCall) {
+    IRBuilder<> IRB(OrigIns);
+    LLVM_DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
-    LLVM_DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n"); 
- 
-    if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) { 
-      if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) { 
-        insertWarningFn(IRB, Origin); 
-      } 
-      return; 
-    } 
- 
-    const DataLayout &DL = OrigIns->getModule()->getDataLayout(); 
- 
-    unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType()); 
-    unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits); 
-    if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) { 
-      FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex]; 
-      Value *ConvertedShadow2 = 
-          IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex))); 
-      IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin 
-                                                ? Origin 
-                                                : (Value *)IRB.getInt32(0)}); 
-    } else { 
+    LLVM_DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
+
+    if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
+      if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
+        insertWarningFn(IRB, Origin);
+      }
+      return;
+    }
+
+    const DataLayout &DL = OrigIns->getModule()->getDataLayout();
+
+    unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
+    unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
+    if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
+      FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
+      Value *ConvertedShadow2 =
+          IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
+      IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
+                                                ? Origin
+                                                : (Value *)IRB.getInt32(0)});
+    } else {
       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
-      Instruction *CheckTerm = SplitBlockAndInsertIfThen( 
-          Cmp, OrigIns, 
-          /* Unreachable */ !MS.Recover, MS.ColdCallWeights); 
- 
-      IRB.SetInsertPoint(CheckTerm); 
-      insertWarningFn(IRB, Origin); 
-      LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n"); 
-    } 
-  } 
- 
-  void materializeChecks(bool InstrumentWithCalls) { 
-    for (const auto &ShadowData : InstrumentationList) { 
-      Instruction *OrigIns = ShadowData.OrigIns; 
-      Value *Shadow = ShadowData.Shadow; 
-      Value *Origin = ShadowData.Origin; 
-      materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls); 
-    } 
-    LLVM_DEBUG(dbgs() << "DONE:\n" << F); 
-  } 
- 
+      Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+          Cmp, OrigIns,
+          /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
+
+      IRB.SetInsertPoint(CheckTerm);
+      insertWarningFn(IRB, Origin);
+      LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
+    }
+  }
+
+  void materializeChecks(bool InstrumentWithCalls) {
+    for (const auto &ShadowData : InstrumentationList) {
+      Instruction *OrigIns = ShadowData.OrigIns;
+      Value *Shadow = ShadowData.Shadow;
+      Value *Origin = ShadowData.Origin;
+      materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
+    }
+    LLVM_DEBUG(dbgs() << "DONE:\n" << F);
+  }
+
   // Returns the last instruction in the new prologue
   void insertKmsanPrologue(IRBuilder<> &IRB) {
-    Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {}); 
-    Constant *Zero = IRB.getInt32(0); 
-    MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState, 
-                                {Zero, IRB.getInt32(0)}, "param_shadow"); 
-    MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState, 
-                                 {Zero, IRB.getInt32(1)}, "retval_shadow"); 
-    MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState, 
-                                {Zero, IRB.getInt32(2)}, "va_arg_shadow"); 
-    MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState, 
-                                      {Zero, IRB.getInt32(3)}, "va_arg_origin"); 
-    MS.VAArgOverflowSizeTLS = 
-        IRB.CreateGEP(MS.MsanContextStateTy, ContextState, 
-                      {Zero, IRB.getInt32(4)}, "va_arg_overflow_size"); 
-    MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState, 
-                                      {Zero, IRB.getInt32(5)}, "param_origin"); 
-    MS.RetvalOriginTLS = 
-        IRB.CreateGEP(MS.MsanContextStateTy, ContextState, 
-                      {Zero, IRB.getInt32(6)}, "retval_origin"); 
-  } 
- 
-  /// Add MemorySanitizer instrumentation to a function. 
-  bool runOnFunction() { 
-    // Iterate all BBs in depth-first order and create shadow instructions 
-    // for all instructions (where applicable). 
-    // For PHI nodes we create dummy shadow PHIs which will be finalized later. 
+    Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
+    Constant *Zero = IRB.getInt32(0);
+    MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                                {Zero, IRB.getInt32(0)}, "param_shadow");
+    MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                                 {Zero, IRB.getInt32(1)}, "retval_shadow");
+    MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                                {Zero, IRB.getInt32(2)}, "va_arg_shadow");
+    MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                                      {Zero, IRB.getInt32(3)}, "va_arg_origin");
+    MS.VAArgOverflowSizeTLS =
+        IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                      {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
+    MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                                      {Zero, IRB.getInt32(5)}, "param_origin");
+    MS.RetvalOriginTLS =
+        IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+                      {Zero, IRB.getInt32(6)}, "retval_origin");
+  }
+
+  /// Add MemorySanitizer instrumentation to a function.
+  bool runOnFunction() {
+    // Iterate all BBs in depth-first order and create shadow instructions
+    // for all instructions (where applicable).
+    // For PHI nodes we create dummy shadow PHIs which will be finalized later.
     for (BasicBlock *BB : depth_first(FnPrologueEnd->getParent()))
-      visit(*BB); 
- 
-    // Finalize PHI nodes. 
-    for (PHINode *PN : ShadowPHINodes) { 
-      PHINode *PNS = cast<PHINode>(getShadow(PN)); 
-      PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr; 
-      size_t NumValues = PN->getNumIncomingValues(); 
-      for (size_t v = 0; v < NumValues; v++) { 
-        PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v)); 
-        if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v)); 
-      } 
-    } 
- 
-    VAHelper->finalizeInstrumentation(); 
- 
-    // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to 
-    // instrumenting only allocas. 
-    if (InstrumentLifetimeStart) { 
-      for (auto Item : LifetimeStartList) { 
-        instrumentAlloca(*Item.second, Item.first); 
-        AllocaSet.erase(Item.second); 
-      } 
-    } 
-    // Poison the allocas for which we didn't instrument the corresponding 
-    // lifetime intrinsics. 
-    for (AllocaInst *AI : AllocaSet) 
-      instrumentAlloca(*AI); 
- 
-    bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 && 
-                               InstrumentationList.size() + StoreList.size() > 
-                                   (unsigned)ClInstrumentationWithCallThreshold; 
- 
-    // Insert shadow value checks. 
-    materializeChecks(InstrumentWithCalls); 
- 
-    // Delayed instrumentation of StoreInst. 
-    // This may not add new address checks. 
-    materializeStores(InstrumentWithCalls); 
- 
-    return true; 
-  } 
- 
-  /// Compute the shadow type that corresponds to a given Value. 
-  Type *getShadowTy(Value *V) { 
-    return getShadowTy(V->getType()); 
-  } 
- 
-  /// Compute the shadow type that corresponds to a given Type. 
-  Type *getShadowTy(Type *OrigTy) { 
-    if (!OrigTy->isSized()) { 
-      return nullptr; 
-    } 
-    // For integer type, shadow is the same as the original type. 
-    // This may return weird-sized types like i1. 
-    if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy)) 
-      return IT; 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) { 
-      uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType()); 
-      return FixedVectorType::get(IntegerType::get(*MS.C, EltSize), 
-                                  cast<FixedVectorType>(VT)->getNumElements()); 
-    } 
-    if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) { 
-      return ArrayType::get(getShadowTy(AT->getElementType()), 
-                            AT->getNumElements()); 
-    } 
-    if (StructType *ST = dyn_cast<StructType>(OrigTy)) { 
-      SmallVector<Type*, 4> Elements; 
-      for (unsigned i = 0, n = ST->getNumElements(); i < n; i++) 
-        Elements.push_back(getShadowTy(ST->getElementType(i))); 
-      StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked()); 
-      LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); 
-      return Res; 
-    } 
-    uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy); 
-    return IntegerType::get(*MS.C, TypeSize); 
-  } 
- 
-  /// Flatten a vector type. 
-  Type *getShadowTyNoVec(Type *ty) { 
-    if (VectorType *vt = dyn_cast<VectorType>(ty)) 
-      return IntegerType::get(*MS.C, 
-                              vt->getPrimitiveSizeInBits().getFixedSize()); 
-    return ty; 
-  } 
- 
+      visit(*BB);
+
+    // Finalize PHI nodes.
+    for (PHINode *PN : ShadowPHINodes) {
+      PHINode *PNS = cast<PHINode>(getShadow(PN));
+      PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
+      size_t NumValues = PN->getNumIncomingValues();
+      for (size_t v = 0; v < NumValues; v++) {
+        PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
+        if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
+      }
+    }
+
+    VAHelper->finalizeInstrumentation();
+
+    // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
+    // instrumenting only allocas.
+    if (InstrumentLifetimeStart) {
+      for (auto Item : LifetimeStartList) {
+        instrumentAlloca(*Item.second, Item.first);
+        AllocaSet.erase(Item.second);
+      }
+    }
+    // Poison the allocas for which we didn't instrument the corresponding
+    // lifetime intrinsics.
+    for (AllocaInst *AI : AllocaSet)
+      instrumentAlloca(*AI);
+
+    bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
+                               InstrumentationList.size() + StoreList.size() >
+                                   (unsigned)ClInstrumentationWithCallThreshold;
+
+    // Insert shadow value checks.
+    materializeChecks(InstrumentWithCalls);
+
+    // Delayed instrumentation of StoreInst.
+    // This may not add new address checks.
+    materializeStores(InstrumentWithCalls);
+
+    return true;
+  }
+
+  /// Compute the shadow type that corresponds to a given Value.
+  Type *getShadowTy(Value *V) {
+    return getShadowTy(V->getType());
+  }
+
+  /// Compute the shadow type that corresponds to a given Type.
+  Type *getShadowTy(Type *OrigTy) {
+    if (!OrigTy->isSized()) {
+      return nullptr;
+    }
+    // For integer type, shadow is the same as the original type.
+    // This may return weird-sized types like i1.
+    if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
+      return IT;
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
+      uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
+      return FixedVectorType::get(IntegerType::get(*MS.C, EltSize),
+                                  cast<FixedVectorType>(VT)->getNumElements());
+    }
+    if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
+      return ArrayType::get(getShadowTy(AT->getElementType()),
+                            AT->getNumElements());
+    }
+    if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
+      SmallVector<Type*, 4> Elements;
+      for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
+        Elements.push_back(getShadowTy(ST->getElementType(i)));
+      StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
+      LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
+      return Res;
+    }
+    uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
+    return IntegerType::get(*MS.C, TypeSize);
+  }
+
+  /// Flatten a vector type.
+  Type *getShadowTyNoVec(Type *ty) {
+    if (VectorType *vt = dyn_cast<VectorType>(ty))
+      return IntegerType::get(*MS.C,
+                              vt->getPrimitiveSizeInBits().getFixedSize());
+    return ty;
+  }
+
   /// Extract combined shadow of struct elements as a bool
   Value *collapseStructShadow(StructType *Struct, Value *Shadow,
                               IRBuilder<> &IRB) {
@@ -1435,12 +1435,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       return collapseStructShadow(Struct, V, IRB);
     if (ArrayType *Array = dyn_cast<ArrayType>(V->getType()))
       return collapseArrayShadow(Array, V, IRB);
-    Type *Ty = V->getType(); 
-    Type *NoVecTy = getShadowTyNoVec(Ty); 
-    if (Ty == NoVecTy) return V; 
-    return IRB.CreateBitCast(V, NoVecTy); 
-  } 
- 
+    Type *Ty = V->getType();
+    Type *NoVecTy = getShadowTyNoVec(Ty);
+    if (Ty == NoVecTy) return V;
+    return IRB.CreateBitCast(V, NoVecTy);
+  }
+
   // Convert a scalar value to an i1 by comparing with 0
   Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
     Type *VTy = V->getType();
@@ -1451,386 +1451,386 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), name);
   }
 
-  /// Compute the integer shadow offset that corresponds to a given 
-  /// application address. 
-  /// 
-  /// Offset = (Addr & ~AndMask) ^ XorMask 
-  Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) { 
-    Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy); 
- 
-    uint64_t AndMask = MS.MapParams->AndMask; 
-    if (AndMask) 
-      OffsetLong = 
-          IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask)); 
- 
-    uint64_t XorMask = MS.MapParams->XorMask; 
-    if (XorMask) 
-      OffsetLong = 
-          IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask)); 
-    return OffsetLong; 
-  } 
- 
-  /// Compute the shadow and origin addresses corresponding to a given 
-  /// application address. 
-  /// 
-  /// Shadow = ShadowBase + Offset 
-  /// Origin = (OriginBase + Offset) & ~3ULL 
-  std::pair<Value *, Value *> 
-  getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy, 
-                              MaybeAlign Alignment) { 
-    Value *ShadowOffset = getShadowPtrOffset(Addr, IRB); 
-    Value *ShadowLong = ShadowOffset; 
-    uint64_t ShadowBase = MS.MapParams->ShadowBase; 
-    if (ShadowBase != 0) { 
-      ShadowLong = 
-        IRB.CreateAdd(ShadowLong, 
-                      ConstantInt::get(MS.IntptrTy, ShadowBase)); 
-    } 
-    Value *ShadowPtr = 
-        IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0)); 
-    Value *OriginPtr = nullptr; 
-    if (MS.TrackOrigins) { 
-      Value *OriginLong = ShadowOffset; 
-      uint64_t OriginBase = MS.MapParams->OriginBase; 
-      if (OriginBase != 0) 
-        OriginLong = IRB.CreateAdd(OriginLong, 
-                                   ConstantInt::get(MS.IntptrTy, OriginBase)); 
-      if (!Alignment || *Alignment < kMinOriginAlignment) { 
-        uint64_t Mask = kMinOriginAlignment.value() - 1; 
-        OriginLong = 
-            IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask)); 
-      } 
-      OriginPtr = 
-          IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0)); 
-    } 
-    return std::make_pair(ShadowPtr, OriginPtr); 
-  } 
- 
-  std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr, 
-                                                       IRBuilder<> &IRB, 
-                                                       Type *ShadowTy, 
-                                                       bool isStore) { 
-    Value *ShadowOriginPtrs; 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    int Size = DL.getTypeStoreSize(ShadowTy); 
- 
-    FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size); 
-    Value *AddrCast = 
-        IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0)); 
-    if (Getter) { 
-      ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast); 
-    } else { 
-      Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size); 
-      ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN 
-                                                : MS.MsanMetadataPtrForLoadN, 
-                                        {AddrCast, SizeVal}); 
-    } 
-    Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0); 
-    ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0)); 
-    Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1); 
- 
-    return std::make_pair(ShadowPtr, OriginPtr); 
-  } 
- 
-  std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB, 
-                                                 Type *ShadowTy, 
-                                                 MaybeAlign Alignment, 
-                                                 bool isStore) { 
-    if (MS.CompileKernel) 
-      return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore); 
-    return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment); 
-  } 
- 
-  /// Compute the shadow address for a given function argument. 
-  /// 
-  /// Shadow = ParamTLS+ArgOffset. 
-  Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB, 
-                                 int ArgOffset) { 
-    Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy); 
-    if (ArgOffset) 
-      Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); 
-    return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0), 
-                              "_msarg"); 
-  } 
- 
-  /// Compute the origin address for a given function argument. 
-  Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB, 
-                                 int ArgOffset) { 
-    if (!MS.TrackOrigins) 
-      return nullptr; 
-    Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy); 
-    if (ArgOffset) 
-      Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); 
-    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0), 
-                              "_msarg_o"); 
-  } 
- 
-  /// Compute the shadow address for a retval. 
-  Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) { 
-    return IRB.CreatePointerCast(MS.RetvalTLS, 
-                                 PointerType::get(getShadowTy(A), 0), 
-                                 "_msret"); 
-  } 
- 
-  /// Compute the origin address for a retval. 
-  Value *getOriginPtrForRetval(IRBuilder<> &IRB) { 
-    // We keep a single origin for the entire retval. Might be too optimistic. 
-    return MS.RetvalOriginTLS; 
-  } 
- 
-  /// Set SV to be the shadow value for V. 
-  void setShadow(Value *V, Value *SV) { 
-    assert(!ShadowMap.count(V) && "Values may only have one shadow"); 
-    ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V); 
-  } 
- 
-  /// Set Origin to be the origin value for V. 
-  void setOrigin(Value *V, Value *Origin) { 
-    if (!MS.TrackOrigins) return; 
-    assert(!OriginMap.count(V) && "Values may only have one origin"); 
-    LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n"); 
-    OriginMap[V] = Origin; 
-  } 
- 
-  Constant *getCleanShadow(Type *OrigTy) { 
-    Type *ShadowTy = getShadowTy(OrigTy); 
-    if (!ShadowTy) 
-      return nullptr; 
-    return Constant::getNullValue(ShadowTy); 
-  } 
- 
-  /// Create a clean shadow value for a given value. 
-  /// 
-  /// Clean shadow (all zeroes) means all bits of the value are defined 
-  /// (initialized). 
-  Constant *getCleanShadow(Value *V) { 
-    return getCleanShadow(V->getType()); 
-  } 
- 
-  /// Create a dirty shadow of a given shadow type. 
-  Constant *getPoisonedShadow(Type *ShadowTy) { 
-    assert(ShadowTy); 
-    if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) 
-      return Constant::getAllOnesValue(ShadowTy); 
-    if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) { 
-      SmallVector<Constant *, 4> Vals(AT->getNumElements(), 
-                                      getPoisonedShadow(AT->getElementType())); 
-      return ConstantArray::get(AT, Vals); 
-    } 
-    if (StructType *ST = dyn_cast<StructType>(ShadowTy)) { 
-      SmallVector<Constant *, 4> Vals; 
-      for (unsigned i = 0, n = ST->getNumElements(); i < n; i++) 
-        Vals.push_back(getPoisonedShadow(ST->getElementType(i))); 
-      return ConstantStruct::get(ST, Vals); 
-    } 
-    llvm_unreachable("Unexpected shadow type"); 
-  } 
- 
-  /// Create a dirty shadow for a given value. 
-  Constant *getPoisonedShadow(Value *V) { 
-    Type *ShadowTy = getShadowTy(V); 
-    if (!ShadowTy) 
-      return nullptr; 
-    return getPoisonedShadow(ShadowTy); 
-  } 
- 
-  /// Create a clean (zero) origin. 
-  Value *getCleanOrigin() { 
-    return Constant::getNullValue(MS.OriginTy); 
-  } 
- 
-  /// Get the shadow value for a given Value. 
-  /// 
-  /// This function either returns the value set earlier with setShadow, 
-  /// or extracts if from ParamTLS (for function arguments). 
-  Value *getShadow(Value *V) { 
-    if (!PropagateShadow) return getCleanShadow(V); 
-    if (Instruction *I = dyn_cast<Instruction>(V)) { 
-      if (I->getMetadata("nosanitize")) 
-        return getCleanShadow(V); 
-      // For instructions the shadow is already stored in the map. 
-      Value *Shadow = ShadowMap[V]; 
-      if (!Shadow) { 
-        LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent())); 
-        (void)I; 
-        assert(Shadow && "No shadow for a value"); 
-      } 
-      return Shadow; 
-    } 
-    if (UndefValue *U = dyn_cast<UndefValue>(V)) { 
-      Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V); 
-      LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n"); 
-      (void)U; 
-      return AllOnes; 
-    } 
-    if (Argument *A = dyn_cast<Argument>(V)) { 
-      // For arguments we compute the shadow on demand and store it in the map. 
-      Value **ShadowPtr = &ShadowMap[V]; 
-      if (*ShadowPtr) 
-        return *ShadowPtr; 
-      Function *F = A->getParent(); 
+  /// Compute the integer shadow offset that corresponds to a given
+  /// application address.
+  ///
+  /// Offset = (Addr & ~AndMask) ^ XorMask
+  Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
+    Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
+
+    uint64_t AndMask = MS.MapParams->AndMask;
+    if (AndMask)
+      OffsetLong =
+          IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
+
+    uint64_t XorMask = MS.MapParams->XorMask;
+    if (XorMask)
+      OffsetLong =
+          IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
+    return OffsetLong;
+  }
+
+  /// Compute the shadow and origin addresses corresponding to a given
+  /// application address.
+  ///
+  /// Shadow = ShadowBase + Offset
+  /// Origin = (OriginBase + Offset) & ~3ULL
+  std::pair<Value *, Value *>
+  getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
+                              MaybeAlign Alignment) {
+    Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
+    Value *ShadowLong = ShadowOffset;
+    uint64_t ShadowBase = MS.MapParams->ShadowBase;
+    if (ShadowBase != 0) {
+      ShadowLong =
+        IRB.CreateAdd(ShadowLong,
+                      ConstantInt::get(MS.IntptrTy, ShadowBase));
+    }
+    Value *ShadowPtr =
+        IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
+    Value *OriginPtr = nullptr;
+    if (MS.TrackOrigins) {
+      Value *OriginLong = ShadowOffset;
+      uint64_t OriginBase = MS.MapParams->OriginBase;
+      if (OriginBase != 0)
+        OriginLong = IRB.CreateAdd(OriginLong,
+                                   ConstantInt::get(MS.IntptrTy, OriginBase));
+      if (!Alignment || *Alignment < kMinOriginAlignment) {
+        uint64_t Mask = kMinOriginAlignment.value() - 1;
+        OriginLong =
+            IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
+      }
+      OriginPtr =
+          IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
+    }
+    return std::make_pair(ShadowPtr, OriginPtr);
+  }
+
+  std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
+                                                       IRBuilder<> &IRB,
+                                                       Type *ShadowTy,
+                                                       bool isStore) {
+    Value *ShadowOriginPtrs;
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    int Size = DL.getTypeStoreSize(ShadowTy);
+
+    FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
+    Value *AddrCast =
+        IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
+    if (Getter) {
+      ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
+    } else {
+      Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
+      ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
+                                                : MS.MsanMetadataPtrForLoadN,
+                                        {AddrCast, SizeVal});
+    }
+    Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
+    ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
+    Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
+
+    return std::make_pair(ShadowPtr, OriginPtr);
+  }
+
+  std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
+                                                 Type *ShadowTy,
+                                                 MaybeAlign Alignment,
+                                                 bool isStore) {
+    if (MS.CompileKernel)
+      return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
+    return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
+  }
+
+  /// Compute the shadow address for a given function argument.
+  ///
+  /// Shadow = ParamTLS+ArgOffset.
+  Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
+                                 int ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
+    if (ArgOffset)
+      Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
+                              "_msarg");
+  }
+
+  /// Compute the origin address for a given function argument.
+  Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
+                                 int ArgOffset) {
+    if (!MS.TrackOrigins)
+      return nullptr;
+    Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
+    if (ArgOffset)
+      Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+                              "_msarg_o");
+  }
+
+  /// Compute the shadow address for a retval.
+  Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
+    return IRB.CreatePointerCast(MS.RetvalTLS,
+                                 PointerType::get(getShadowTy(A), 0),
+                                 "_msret");
+  }
+
+  /// Compute the origin address for a retval.
+  Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
+    // We keep a single origin for the entire retval. Might be too optimistic.
+    return MS.RetvalOriginTLS;
+  }
+
+  /// Set SV to be the shadow value for V.
+  void setShadow(Value *V, Value *SV) {
+    assert(!ShadowMap.count(V) && "Values may only have one shadow");
+    ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
+  }
+
+  /// Set Origin to be the origin value for V.
+  void setOrigin(Value *V, Value *Origin) {
+    if (!MS.TrackOrigins) return;
+    assert(!OriginMap.count(V) && "Values may only have one origin");
+    LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
+    OriginMap[V] = Origin;
+  }
+
+  Constant *getCleanShadow(Type *OrigTy) {
+    Type *ShadowTy = getShadowTy(OrigTy);
+    if (!ShadowTy)
+      return nullptr;
+    return Constant::getNullValue(ShadowTy);
+  }
+
+  /// Create a clean shadow value for a given value.
+  ///
+  /// Clean shadow (all zeroes) means all bits of the value are defined
+  /// (initialized).
+  Constant *getCleanShadow(Value *V) {
+    return getCleanShadow(V->getType());
+  }
+
+  /// Create a dirty shadow of a given shadow type.
+  Constant *getPoisonedShadow(Type *ShadowTy) {
+    assert(ShadowTy);
+    if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
+      return Constant::getAllOnesValue(ShadowTy);
+    if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
+      SmallVector<Constant *, 4> Vals(AT->getNumElements(),
+                                      getPoisonedShadow(AT->getElementType()));
+      return ConstantArray::get(AT, Vals);
+    }
+    if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
+      SmallVector<Constant *, 4> Vals;
+      for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
+        Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
+      return ConstantStruct::get(ST, Vals);
+    }
+    llvm_unreachable("Unexpected shadow type");
+  }
+
+  /// Create a dirty shadow for a given value.
+  Constant *getPoisonedShadow(Value *V) {
+    Type *ShadowTy = getShadowTy(V);
+    if (!ShadowTy)
+      return nullptr;
+    return getPoisonedShadow(ShadowTy);
+  }
+
+  /// Create a clean (zero) origin.
+  Value *getCleanOrigin() {
+    return Constant::getNullValue(MS.OriginTy);
+  }
+
+  /// Get the shadow value for a given Value.
+  ///
+  /// This function either returns the value set earlier with setShadow,
+  /// or extracts if from ParamTLS (for function arguments).
+  Value *getShadow(Value *V) {
+    if (!PropagateShadow) return getCleanShadow(V);
+    if (Instruction *I = dyn_cast<Instruction>(V)) {
+      if (I->getMetadata("nosanitize"))
+        return getCleanShadow(V);
+      // For instructions the shadow is already stored in the map.
+      Value *Shadow = ShadowMap[V];
+      if (!Shadow) {
+        LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
+        (void)I;
+        assert(Shadow && "No shadow for a value");
+      }
+      return Shadow;
+    }
+    if (UndefValue *U = dyn_cast<UndefValue>(V)) {
+      Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
+      LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
+      (void)U;
+      return AllOnes;
+    }
+    if (Argument *A = dyn_cast<Argument>(V)) {
+      // For arguments we compute the shadow on demand and store it in the map.
+      Value **ShadowPtr = &ShadowMap[V];
+      if (*ShadowPtr)
+        return *ShadowPtr;
+      Function *F = A->getParent();
       IRBuilder<> EntryIRB(FnPrologueEnd);
-      unsigned ArgOffset = 0; 
-      const DataLayout &DL = F->getParent()->getDataLayout(); 
-      for (auto &FArg : F->args()) { 
-        if (!FArg.getType()->isSized()) { 
-          LLVM_DEBUG(dbgs() << "Arg is not sized\n"); 
-          continue; 
-        } 
- 
-        bool FArgByVal = FArg.hasByValAttr(); 
-        bool FArgNoUndef = FArg.hasAttribute(Attribute::NoUndef); 
-        bool FArgEagerCheck = ClEagerChecks && !FArgByVal && FArgNoUndef; 
-        unsigned Size = 
-            FArg.hasByValAttr() 
-                ? DL.getTypeAllocSize(FArg.getParamByValType()) 
-                : DL.getTypeAllocSize(FArg.getType()); 
- 
-        if (A == &FArg) { 
-          bool Overflow = ArgOffset + Size > kParamTLSSize; 
-          if (FArgEagerCheck) { 
-            *ShadowPtr = getCleanShadow(V); 
-            setOrigin(A, getCleanOrigin()); 
-            continue; 
-          } else if (FArgByVal) { 
-            Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); 
-            // ByVal pointer itself has clean shadow. We copy the actual 
-            // argument shadow to the underlying memory. 
-            // Figure out maximal valid memcpy alignment. 
-            const Align ArgAlign = DL.getValueOrABITypeAlignment( 
-                MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType()); 
-            Value *CpShadowPtr = 
-                getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign, 
-                                   /*isStore*/ true) 
-                    .first; 
-            // TODO(glider): need to copy origins. 
-            if (Overflow) { 
-              // ParamTLS overflow. 
-              EntryIRB.CreateMemSet( 
-                  CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()), 
-                  Size, ArgAlign); 
-            } else { 
-              const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment); 
-              Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base, 
-                                                 CopyAlign, Size); 
-              LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n"); 
-              (void)Cpy; 
-            } 
-            *ShadowPtr = getCleanShadow(V); 
-          } else { 
-            // Shadow over TLS 
-            Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); 
-            if (Overflow) { 
-              // ParamTLS overflow. 
-              *ShadowPtr = getCleanShadow(V); 
-            } else { 
-              *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base, 
-                                                      kShadowTLSAlignment); 
-            } 
-          } 
-          LLVM_DEBUG(dbgs() 
-                     << "  ARG:    " << FArg << " ==> " << **ShadowPtr << "\n"); 
-          if (MS.TrackOrigins && !Overflow) { 
-            Value *OriginPtr = 
-                getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset); 
-            setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr)); 
-          } else { 
-            setOrigin(A, getCleanOrigin()); 
-          } 
+      unsigned ArgOffset = 0;
+      const DataLayout &DL = F->getParent()->getDataLayout();
+      for (auto &FArg : F->args()) {
+        if (!FArg.getType()->isSized()) {
+          LLVM_DEBUG(dbgs() << "Arg is not sized\n");
+          continue;
+        }
+
+        bool FArgByVal = FArg.hasByValAttr();
+        bool FArgNoUndef = FArg.hasAttribute(Attribute::NoUndef);
+        bool FArgEagerCheck = ClEagerChecks && !FArgByVal && FArgNoUndef;
+        unsigned Size =
+            FArg.hasByValAttr()
+                ? DL.getTypeAllocSize(FArg.getParamByValType())
+                : DL.getTypeAllocSize(FArg.getType());
+
+        if (A == &FArg) {
+          bool Overflow = ArgOffset + Size > kParamTLSSize;
+          if (FArgEagerCheck) {
+            *ShadowPtr = getCleanShadow(V);
+            setOrigin(A, getCleanOrigin());
+            continue;
+          } else if (FArgByVal) {
+            Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
+            // ByVal pointer itself has clean shadow. We copy the actual
+            // argument shadow to the underlying memory.
+            // Figure out maximal valid memcpy alignment.
+            const Align ArgAlign = DL.getValueOrABITypeAlignment(
+                MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType());
+            Value *CpShadowPtr =
+                getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
+                                   /*isStore*/ true)
+                    .first;
+            // TODO(glider): need to copy origins.
+            if (Overflow) {
+              // ParamTLS overflow.
+              EntryIRB.CreateMemSet(
+                  CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
+                  Size, ArgAlign);
+            } else {
+              const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
+              Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
+                                                 CopyAlign, Size);
+              LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
+              (void)Cpy;
+            }
+            *ShadowPtr = getCleanShadow(V);
+          } else {
+            // Shadow over TLS
+            Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
+            if (Overflow) {
+              // ParamTLS overflow.
+              *ShadowPtr = getCleanShadow(V);
+            } else {
+              *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
+                                                      kShadowTLSAlignment);
+            }
+          }
+          LLVM_DEBUG(dbgs()
+                     << "  ARG:    " << FArg << " ==> " << **ShadowPtr << "\n");
+          if (MS.TrackOrigins && !Overflow) {
+            Value *OriginPtr =
+                getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
+            setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
+          } else {
+            setOrigin(A, getCleanOrigin());
+          }
 
           break;
-        } 
- 
-        if (!FArgEagerCheck) 
-          ArgOffset += alignTo(Size, kShadowTLSAlignment); 
-      } 
-      assert(*ShadowPtr && "Could not find shadow for an argument"); 
-      return *ShadowPtr; 
-    } 
-    // For everything else the shadow is zero. 
-    return getCleanShadow(V); 
-  } 
- 
-  /// Get the shadow for i-th argument of the instruction I. 
-  Value *getShadow(Instruction *I, int i) { 
-    return getShadow(I->getOperand(i)); 
-  } 
- 
-  /// Get the origin for a value. 
-  Value *getOrigin(Value *V) { 
-    if (!MS.TrackOrigins) return nullptr; 
-    if (!PropagateShadow) return getCleanOrigin(); 
-    if (isa<Constant>(V)) return getCleanOrigin(); 
-    assert((isa<Instruction>(V) || isa<Argument>(V)) && 
-           "Unexpected value type in getOrigin()"); 
-    if (Instruction *I = dyn_cast<Instruction>(V)) { 
-      if (I->getMetadata("nosanitize")) 
-        return getCleanOrigin(); 
-    } 
-    Value *Origin = OriginMap[V]; 
-    assert(Origin && "Missing origin"); 
-    return Origin; 
-  } 
- 
-  /// Get the origin for i-th argument of the instruction I. 
-  Value *getOrigin(Instruction *I, int i) { 
-    return getOrigin(I->getOperand(i)); 
-  } 
- 
-  /// Remember the place where a shadow check should be inserted. 
-  /// 
-  /// This location will be later instrumented with a check that will print a 
-  /// UMR warning in runtime if the shadow value is not 0. 
-  void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) { 
-    assert(Shadow); 
-    if (!InsertChecks) return; 
-#ifndef NDEBUG 
-    Type *ShadowTy = Shadow->getType(); 
+        }
+
+        if (!FArgEagerCheck)
+          ArgOffset += alignTo(Size, kShadowTLSAlignment);
+      }
+      assert(*ShadowPtr && "Could not find shadow for an argument");
+      return *ShadowPtr;
+    }
+    // For everything else the shadow is zero.
+    return getCleanShadow(V);
+  }
+
+  /// Get the shadow for i-th argument of the instruction I.
+  Value *getShadow(Instruction *I, int i) {
+    return getShadow(I->getOperand(i));
+  }
+
+  /// Get the origin for a value.
+  Value *getOrigin(Value *V) {
+    if (!MS.TrackOrigins) return nullptr;
+    if (!PropagateShadow) return getCleanOrigin();
+    if (isa<Constant>(V)) return getCleanOrigin();
+    assert((isa<Instruction>(V) || isa<Argument>(V)) &&
+           "Unexpected value type in getOrigin()");
+    if (Instruction *I = dyn_cast<Instruction>(V)) {
+      if (I->getMetadata("nosanitize"))
+        return getCleanOrigin();
+    }
+    Value *Origin = OriginMap[V];
+    assert(Origin && "Missing origin");
+    return Origin;
+  }
+
+  /// Get the origin for i-th argument of the instruction I.
+  Value *getOrigin(Instruction *I, int i) {
+    return getOrigin(I->getOperand(i));
+  }
+
+  /// Remember the place where a shadow check should be inserted.
+  ///
+  /// This location will be later instrumented with a check that will print a
+  /// UMR warning in runtime if the shadow value is not 0.
+  void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
+    assert(Shadow);
+    if (!InsertChecks) return;
+#ifndef NDEBUG
+    Type *ShadowTy = Shadow->getType();
     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
             isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
            "Can only insert checks for integer, vector, and aggregate shadow "
            "types");
-#endif 
-    InstrumentationList.push_back( 
-        ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns)); 
-  } 
- 
-  /// Remember the place where a shadow check should be inserted. 
-  /// 
-  /// This location will be later instrumented with a check that will print a 
-  /// UMR warning in runtime if the value is not fully defined. 
-  void insertShadowCheck(Value *Val, Instruction *OrigIns) { 
-    assert(Val); 
-    Value *Shadow, *Origin; 
-    if (ClCheckConstantShadow) { 
-      Shadow = getShadow(Val); 
-      if (!Shadow) return; 
-      Origin = getOrigin(Val); 
-    } else { 
-      Shadow = dyn_cast_or_null<Instruction>(getShadow(Val)); 
-      if (!Shadow) return; 
-      Origin = dyn_cast_or_null<Instruction>(getOrigin(Val)); 
-    } 
-    insertShadowCheck(Shadow, Origin, OrigIns); 
-  } 
- 
-  AtomicOrdering addReleaseOrdering(AtomicOrdering a) { 
-    switch (a) { 
-      case AtomicOrdering::NotAtomic: 
-        return AtomicOrdering::NotAtomic; 
-      case AtomicOrdering::Unordered: 
-      case AtomicOrdering::Monotonic: 
-      case AtomicOrdering::Release: 
-        return AtomicOrdering::Release; 
-      case AtomicOrdering::Acquire: 
-      case AtomicOrdering::AcquireRelease: 
-        return AtomicOrdering::AcquireRelease; 
-      case AtomicOrdering::SequentiallyConsistent: 
-        return AtomicOrdering::SequentiallyConsistent; 
-    } 
-    llvm_unreachable("Unknown ordering"); 
-  } 
- 
+#endif
+    InstrumentationList.push_back(
+        ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+  }
+
+  /// Remember the place where a shadow check should be inserted.
+  ///
+  /// This location will be later instrumented with a check that will print a
+  /// UMR warning in runtime if the value is not fully defined.
+  void insertShadowCheck(Value *Val, Instruction *OrigIns) {
+    assert(Val);
+    Value *Shadow, *Origin;
+    if (ClCheckConstantShadow) {
+      Shadow = getShadow(Val);
+      if (!Shadow) return;
+      Origin = getOrigin(Val);
+    } else {
+      Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
+      if (!Shadow) return;
+      Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
+    }
+    insertShadowCheck(Shadow, Origin, OrigIns);
+  }
+
+  AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
+    switch (a) {
+      case AtomicOrdering::NotAtomic:
+        return AtomicOrdering::NotAtomic;
+      case AtomicOrdering::Unordered:
+      case AtomicOrdering::Monotonic:
+      case AtomicOrdering::Release:
+        return AtomicOrdering::Release;
+      case AtomicOrdering::Acquire:
+      case AtomicOrdering::AcquireRelease:
+        return AtomicOrdering::AcquireRelease;
+      case AtomicOrdering::SequentiallyConsistent:
+        return AtomicOrdering::SequentiallyConsistent;
+    }
+    llvm_unreachable("Unknown ordering");
+  }
+
   Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
     uint32_t OrderingTable[NumOrderings] = {};
@@ -1849,23 +1849,23 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                    makeArrayRef(OrderingTable, NumOrderings));
   }
 
-  AtomicOrdering addAcquireOrdering(AtomicOrdering a) { 
-    switch (a) { 
-      case AtomicOrdering::NotAtomic: 
-        return AtomicOrdering::NotAtomic; 
-      case AtomicOrdering::Unordered: 
-      case AtomicOrdering::Monotonic: 
-      case AtomicOrdering::Acquire: 
-        return AtomicOrdering::Acquire; 
-      case AtomicOrdering::Release: 
-      case AtomicOrdering::AcquireRelease: 
-        return AtomicOrdering::AcquireRelease; 
-      case AtomicOrdering::SequentiallyConsistent: 
-        return AtomicOrdering::SequentiallyConsistent; 
-    } 
-    llvm_unreachable("Unknown ordering"); 
-  } 
- 
+  AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
+    switch (a) {
+      case AtomicOrdering::NotAtomic:
+        return AtomicOrdering::NotAtomic;
+      case AtomicOrdering::Unordered:
+      case AtomicOrdering::Monotonic:
+      case AtomicOrdering::Acquire:
+        return AtomicOrdering::Acquire;
+      case AtomicOrdering::Release:
+      case AtomicOrdering::AcquireRelease:
+        return AtomicOrdering::AcquireRelease;
+      case AtomicOrdering::SequentiallyConsistent:
+        return AtomicOrdering::SequentiallyConsistent;
+    }
+    llvm_unreachable("Unknown ordering");
+  }
+
   Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
     uint32_t OrderingTable[NumOrderings] = {};
@@ -1884,1353 +1884,1353 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                    makeArrayRef(OrderingTable, NumOrderings));
   }
 
-  // ------------------- Visitors. 
-  using InstVisitor<MemorySanitizerVisitor>::visit; 
-  void visit(Instruction &I) { 
+  // ------------------- Visitors.
+  using InstVisitor<MemorySanitizerVisitor>::visit;
+  void visit(Instruction &I) {
     if (I.getMetadata("nosanitize"))
       return;
     // Don't want to visit if we're in the prologue
     if (isInPrologue(I))
       return;
     InstVisitor<MemorySanitizerVisitor>::visit(I);
-  } 
- 
-  /// Instrument LoadInst 
-  /// 
-  /// Loads the corresponding shadow and (optionally) origin. 
-  /// Optionally, checks that the load address is fully defined. 
-  void visitLoadInst(LoadInst &I) { 
-    assert(I.getType()->isSized() && "Load type must have size"); 
-    assert(!I.getMetadata("nosanitize")); 
-    IRBuilder<> IRB(I.getNextNode()); 
-    Type *ShadowTy = getShadowTy(&I); 
-    Value *Addr = I.getPointerOperand(); 
-    Value *ShadowPtr = nullptr, *OriginPtr = nullptr; 
-    const Align Alignment = assumeAligned(I.getAlignment()); 
-    if (PropagateShadow) { 
-      std::tie(ShadowPtr, OriginPtr) = 
-          getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false); 
-      setShadow(&I, 
-                IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld")); 
-    } else { 
-      setShadow(&I, getCleanShadow(&I)); 
-    } 
- 
-    if (ClCheckAccessAddress) 
-      insertShadowCheck(I.getPointerOperand(), &I); 
- 
-    if (I.isAtomic()) 
-      I.setOrdering(addAcquireOrdering(I.getOrdering())); 
- 
-    if (MS.TrackOrigins) { 
-      if (PropagateShadow) { 
-        const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment); 
-        setOrigin( 
-            &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment)); 
-      } else { 
-        setOrigin(&I, getCleanOrigin()); 
-      } 
-    } 
-  } 
- 
-  /// Instrument StoreInst 
-  /// 
-  /// Stores the corresponding shadow and (optionally) origin. 
-  /// Optionally, checks that the store address is fully defined. 
-  void visitStoreInst(StoreInst &I) { 
-    StoreList.push_back(&I); 
-    if (ClCheckAccessAddress) 
-      insertShadowCheck(I.getPointerOperand(), &I); 
-  } 
- 
-  void handleCASOrRMW(Instruction &I) { 
-    assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I)); 
- 
-    IRBuilder<> IRB(&I); 
-    Value *Addr = I.getOperand(0); 
-    Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(), Align(1), 
-                                          /*isStore*/ true) 
-                           .first; 
- 
-    if (ClCheckAccessAddress) 
-      insertShadowCheck(Addr, &I); 
- 
-    // Only test the conditional argument of cmpxchg instruction. 
-    // The other argument can potentially be uninitialized, but we can not 
-    // detect this situation reliably without possible false positives. 
-    if (isa<AtomicCmpXchgInst>(I)) 
-      insertShadowCheck(I.getOperand(1), &I); 
- 
-    IRB.CreateStore(getCleanShadow(&I), ShadowPtr); 
- 
-    setShadow(&I, getCleanShadow(&I)); 
-    setOrigin(&I, getCleanOrigin()); 
-  } 
- 
-  void visitAtomicRMWInst(AtomicRMWInst &I) { 
-    handleCASOrRMW(I); 
-    I.setOrdering(addReleaseOrdering(I.getOrdering())); 
-  } 
- 
-  void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { 
-    handleCASOrRMW(I); 
-    I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering())); 
-  } 
- 
-  // Vector manipulation. 
-  void visitExtractElementInst(ExtractElementInst &I) { 
-    insertShadowCheck(I.getOperand(1), &I); 
-    IRBuilder<> IRB(&I); 
-    setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1), 
-              "_msprop")); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
-  void visitInsertElementInst(InsertElementInst &I) { 
-    insertShadowCheck(I.getOperand(2), &I); 
-    IRBuilder<> IRB(&I); 
-    setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1), 
-              I.getOperand(2), "_msprop")); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  void visitShuffleVectorInst(ShuffleVectorInst &I) { 
-    IRBuilder<> IRB(&I); 
-    setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1), 
-                                          I.getShuffleMask(), "_msprop")); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  // Casts. 
-  void visitSExtInst(SExtInst &I) { 
-    IRBuilder<> IRB(&I); 
-    setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop")); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
-  void visitZExtInst(ZExtInst &I) { 
-    IRBuilder<> IRB(&I); 
-    setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop")); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
-  void visitTruncInst(TruncInst &I) { 
-    IRBuilder<> IRB(&I); 
-    setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop")); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
-  void visitBitCastInst(BitCastInst &I) { 
-    // Special case: if this is the bitcast (there is exactly 1 allowed) between 
-    // a musttail call and a ret, don't instrument. New instructions are not 
-    // allowed after a musttail call. 
-    if (auto *CI = dyn_cast<CallInst>(I.getOperand(0))) 
-      if (CI->isMustTailCall()) 
-        return; 
-    IRBuilder<> IRB(&I); 
-    setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I))); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
-  void visitPtrToIntInst(PtrToIntInst &I) { 
-    IRBuilder<> IRB(&I); 
-    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false, 
-             "_msprop_ptrtoint")); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
-  void visitIntToPtrInst(IntToPtrInst &I) { 
-    IRBuilder<> IRB(&I); 
-    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false, 
-             "_msprop_inttoptr")); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
-  void visitFPToSIInst(CastInst& I) { handleShadowOr(I); } 
-  void visitFPToUIInst(CastInst& I) { handleShadowOr(I); } 
-  void visitSIToFPInst(CastInst& I) { handleShadowOr(I); } 
-  void visitUIToFPInst(CastInst& I) { handleShadowOr(I); } 
-  void visitFPExtInst(CastInst& I) { handleShadowOr(I); } 
-  void visitFPTruncInst(CastInst& I) { handleShadowOr(I); } 
- 
-  /// Propagate shadow for bitwise AND. 
-  /// 
-  /// This code is exact, i.e. if, for example, a bit in the left argument 
-  /// is defined and 0, then neither the value not definedness of the 
-  /// corresponding bit in B don't affect the resulting shadow. 
-  void visitAnd(BinaryOperator &I) { 
-    IRBuilder<> IRB(&I); 
-    //  "And" of 0 and a poisoned value results in unpoisoned value. 
-    //  1&1 => 1;     0&1 => 0;     p&1 => p; 
-    //  1&0 => 0;     0&0 => 0;     p&0 => 0; 
-    //  1&p => p;     0&p => 0;     p&p => p; 
-    //  S = (S1 & S2) | (V1 & S2) | (S1 & V2) 
-    Value *S1 = getShadow(&I, 0); 
-    Value *S2 = getShadow(&I, 1); 
-    Value *V1 = I.getOperand(0); 
-    Value *V2 = I.getOperand(1); 
-    if (V1->getType() != S1->getType()) { 
-      V1 = IRB.CreateIntCast(V1, S1->getType(), false); 
-      V2 = IRB.CreateIntCast(V2, S2->getType(), false); 
-    } 
-    Value *S1S2 = IRB.CreateAnd(S1, S2); 
-    Value *V1S2 = IRB.CreateAnd(V1, S2); 
-    Value *S1V2 = IRB.CreateAnd(S1, V2); 
-    setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2})); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  void visitOr(BinaryOperator &I) { 
-    IRBuilder<> IRB(&I); 
-    //  "Or" of 1 and a poisoned value results in unpoisoned value. 
-    //  1|1 => 1;     0|1 => 1;     p|1 => 1; 
-    //  1|0 => 1;     0|0 => 0;     p|0 => p; 
-    //  1|p => 1;     0|p => p;     p|p => p; 
-    //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2) 
-    Value *S1 = getShadow(&I, 0); 
-    Value *S2 = getShadow(&I, 1); 
-    Value *V1 = IRB.CreateNot(I.getOperand(0)); 
-    Value *V2 = IRB.CreateNot(I.getOperand(1)); 
-    if (V1->getType() != S1->getType()) { 
-      V1 = IRB.CreateIntCast(V1, S1->getType(), false); 
-      V2 = IRB.CreateIntCast(V2, S2->getType(), false); 
-    } 
-    Value *S1S2 = IRB.CreateAnd(S1, S2); 
-    Value *V1S2 = IRB.CreateAnd(V1, S2); 
-    Value *S1V2 = IRB.CreateAnd(S1, V2); 
-    setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2})); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  /// Default propagation of shadow and/or origin. 
-  /// 
-  /// This class implements the general case of shadow propagation, used in all 
-  /// cases where we don't know and/or don't care about what the operation 
-  /// actually does. It converts all input shadow values to a common type 
-  /// (extending or truncating as necessary), and bitwise OR's them. 
-  /// 
-  /// This is much cheaper than inserting checks (i.e. requiring inputs to be 
-  /// fully initialized), and less prone to false positives. 
-  /// 
-  /// This class also implements the general case of origin propagation. For a 
-  /// Nary operation, result origin is set to the origin of an argument that is 
-  /// not entirely initialized. If there is more than one such arguments, the 
-  /// rightmost of them is picked. It does not matter which one is picked if all 
-  /// arguments are initialized. 
-  template <bool CombineShadow> 
-  class Combiner { 
-    Value *Shadow = nullptr; 
-    Value *Origin = nullptr; 
-    IRBuilder<> &IRB; 
-    MemorySanitizerVisitor *MSV; 
- 
-  public: 
-    Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) 
-        : IRB(IRB), MSV(MSV) {} 
- 
-    /// Add a pair of shadow and origin values to the mix. 
-    Combiner &Add(Value *OpShadow, Value *OpOrigin) { 
-      if (CombineShadow) { 
-        assert(OpShadow); 
-        if (!Shadow) 
-          Shadow = OpShadow; 
-        else { 
-          OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType()); 
-          Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop"); 
-        } 
-      } 
- 
-      if (MSV->MS.TrackOrigins) { 
-        assert(OpOrigin); 
-        if (!Origin) { 
-          Origin = OpOrigin; 
-        } else { 
-          Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin); 
-          // No point in adding something that might result in 0 origin value. 
-          if (!ConstOrigin || !ConstOrigin->isNullValue()) { 
+  }
+
+  /// Instrument LoadInst
+  ///
+  /// Loads the corresponding shadow and (optionally) origin.
+  /// Optionally, checks that the load address is fully defined.
+  void visitLoadInst(LoadInst &I) {
+    assert(I.getType()->isSized() && "Load type must have size");
+    assert(!I.getMetadata("nosanitize"));
+    IRBuilder<> IRB(I.getNextNode());
+    Type *ShadowTy = getShadowTy(&I);
+    Value *Addr = I.getPointerOperand();
+    Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
+    const Align Alignment = assumeAligned(I.getAlignment());
+    if (PropagateShadow) {
+      std::tie(ShadowPtr, OriginPtr) =
+          getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
+      setShadow(&I,
+                IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
+    } else {
+      setShadow(&I, getCleanShadow(&I));
+    }
+
+    if (ClCheckAccessAddress)
+      insertShadowCheck(I.getPointerOperand(), &I);
+
+    if (I.isAtomic())
+      I.setOrdering(addAcquireOrdering(I.getOrdering()));
+
+    if (MS.TrackOrigins) {
+      if (PropagateShadow) {
+        const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
+        setOrigin(
+            &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
+      } else {
+        setOrigin(&I, getCleanOrigin());
+      }
+    }
+  }
+
+  /// Instrument StoreInst
+  ///
+  /// Stores the corresponding shadow and (optionally) origin.
+  /// Optionally, checks that the store address is fully defined.
+  void visitStoreInst(StoreInst &I) {
+    StoreList.push_back(&I);
+    if (ClCheckAccessAddress)
+      insertShadowCheck(I.getPointerOperand(), &I);
+  }
+
+  void handleCASOrRMW(Instruction &I) {
+    assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
+
+    IRBuilder<> IRB(&I);
+    Value *Addr = I.getOperand(0);
+    Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(), Align(1),
+                                          /*isStore*/ true)
+                           .first;
+
+    if (ClCheckAccessAddress)
+      insertShadowCheck(Addr, &I);
+
+    // Only test the conditional argument of cmpxchg instruction.
+    // The other argument can potentially be uninitialized, but we can not
+    // detect this situation reliably without possible false positives.
+    if (isa<AtomicCmpXchgInst>(I))
+      insertShadowCheck(I.getOperand(1), &I);
+
+    IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
+
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+
+  void visitAtomicRMWInst(AtomicRMWInst &I) {
+    handleCASOrRMW(I);
+    I.setOrdering(addReleaseOrdering(I.getOrdering()));
+  }
+
+  void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
+    handleCASOrRMW(I);
+    I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
+  }
+
+  // Vector manipulation.
+  void visitExtractElementInst(ExtractElementInst &I) {
+    insertShadowCheck(I.getOperand(1), &I);
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
+              "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitInsertElementInst(InsertElementInst &I) {
+    insertShadowCheck(I.getOperand(2), &I);
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
+              I.getOperand(2), "_msprop"));
+    setOriginForNaryOp(I);
+  }
+
+  void visitShuffleVectorInst(ShuffleVectorInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
+                                          I.getShuffleMask(), "_msprop"));
+    setOriginForNaryOp(I);
+  }
+
+  // Casts.
+  void visitSExtInst(SExtInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitZExtInst(ZExtInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitTruncInst(TruncInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitBitCastInst(BitCastInst &I) {
+    // Special case: if this is the bitcast (there is exactly 1 allowed) between
+    // a musttail call and a ret, don't instrument. New instructions are not
+    // allowed after a musttail call.
+    if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
+      if (CI->isMustTailCall())
+        return;
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitPtrToIntInst(PtrToIntInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
+             "_msprop_ptrtoint"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitIntToPtrInst(IntToPtrInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
+             "_msprop_inttoptr"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
+  void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
+  void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
+  void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
+  void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
+  void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
+
+  /// Propagate shadow for bitwise AND.
+  ///
+  /// This code is exact, i.e. if, for example, a bit in the left argument
+  /// is defined and 0, then neither the value not definedness of the
+  /// corresponding bit in B don't affect the resulting shadow.
+  void visitAnd(BinaryOperator &I) {
+    IRBuilder<> IRB(&I);
+    //  "And" of 0 and a poisoned value results in unpoisoned value.
+    //  1&1 => 1;     0&1 => 0;     p&1 => p;
+    //  1&0 => 0;     0&0 => 0;     p&0 => 0;
+    //  1&p => p;     0&p => 0;     p&p => p;
+    //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
+    Value *S1 = getShadow(&I, 0);
+    Value *S2 = getShadow(&I, 1);
+    Value *V1 = I.getOperand(0);
+    Value *V2 = I.getOperand(1);
+    if (V1->getType() != S1->getType()) {
+      V1 = IRB.CreateIntCast(V1, S1->getType(), false);
+      V2 = IRB.CreateIntCast(V2, S2->getType(), false);
+    }
+    Value *S1S2 = IRB.CreateAnd(S1, S2);
+    Value *V1S2 = IRB.CreateAnd(V1, S2);
+    Value *S1V2 = IRB.CreateAnd(S1, V2);
+    setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
+    setOriginForNaryOp(I);
+  }
+
+  void visitOr(BinaryOperator &I) {
+    IRBuilder<> IRB(&I);
+    //  "Or" of 1 and a poisoned value results in unpoisoned value.
+    //  1|1 => 1;     0|1 => 1;     p|1 => 1;
+    //  1|0 => 1;     0|0 => 0;     p|0 => p;
+    //  1|p => 1;     0|p => p;     p|p => p;
+    //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
+    Value *S1 = getShadow(&I, 0);
+    Value *S2 = getShadow(&I, 1);
+    Value *V1 = IRB.CreateNot(I.getOperand(0));
+    Value *V2 = IRB.CreateNot(I.getOperand(1));
+    if (V1->getType() != S1->getType()) {
+      V1 = IRB.CreateIntCast(V1, S1->getType(), false);
+      V2 = IRB.CreateIntCast(V2, S2->getType(), false);
+    }
+    Value *S1S2 = IRB.CreateAnd(S1, S2);
+    Value *V1S2 = IRB.CreateAnd(V1, S2);
+    Value *S1V2 = IRB.CreateAnd(S1, V2);
+    setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
+    setOriginForNaryOp(I);
+  }
+
+  /// Default propagation of shadow and/or origin.
+  ///
+  /// This class implements the general case of shadow propagation, used in all
+  /// cases where we don't know and/or don't care about what the operation
+  /// actually does. It converts all input shadow values to a common type
+  /// (extending or truncating as necessary), and bitwise OR's them.
+  ///
+  /// This is much cheaper than inserting checks (i.e. requiring inputs to be
+  /// fully initialized), and less prone to false positives.
+  ///
+  /// This class also implements the general case of origin propagation. For a
+  /// Nary operation, result origin is set to the origin of an argument that is
+  /// not entirely initialized. If there is more than one such arguments, the
+  /// rightmost of them is picked. It does not matter which one is picked if all
+  /// arguments are initialized.
+  template <bool CombineShadow>
+  class Combiner {
+    Value *Shadow = nullptr;
+    Value *Origin = nullptr;
+    IRBuilder<> &IRB;
+    MemorySanitizerVisitor *MSV;
+
+  public:
+    Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
+        : IRB(IRB), MSV(MSV) {}
+
+    /// Add a pair of shadow and origin values to the mix.
+    Combiner &Add(Value *OpShadow, Value *OpOrigin) {
+      if (CombineShadow) {
+        assert(OpShadow);
+        if (!Shadow)
+          Shadow = OpShadow;
+        else {
+          OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
+          Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
+        }
+      }
+
+      if (MSV->MS.TrackOrigins) {
+        assert(OpOrigin);
+        if (!Origin) {
+          Origin = OpOrigin;
+        } else {
+          Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
+          // No point in adding something that might result in 0 origin value.
+          if (!ConstOrigin || !ConstOrigin->isNullValue()) {
             Value *FlatShadow = MSV->convertShadowToScalar(OpShadow, IRB);
-            Value *Cond = 
-                IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow)); 
-            Origin = IRB.CreateSelect(Cond, OpOrigin, Origin); 
-          } 
-        } 
-      } 
-      return *this; 
-    } 
- 
-    /// Add an application value to the mix. 
-    Combiner &Add(Value *V) { 
-      Value *OpShadow = MSV->getShadow(V); 
-      Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr; 
-      return Add(OpShadow, OpOrigin); 
-    } 
- 
-    /// Set the current combined values as the given instruction's shadow 
-    /// and origin. 
-    void Done(Instruction *I) { 
-      if (CombineShadow) { 
-        assert(Shadow); 
-        Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I)); 
-        MSV->setShadow(I, Shadow); 
-      } 
-      if (MSV->MS.TrackOrigins) { 
-        assert(Origin); 
-        MSV->setOrigin(I, Origin); 
-      } 
-    } 
-  }; 
- 
-  using ShadowAndOriginCombiner = Combiner<true>; 
-  using OriginCombiner = Combiner<false>; 
- 
-  /// Propagate origin for arbitrary operation. 
-  void setOriginForNaryOp(Instruction &I) { 
-    if (!MS.TrackOrigins) return; 
-    IRBuilder<> IRB(&I); 
-    OriginCombiner OC(this, IRB); 
-    for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI) 
-      OC.Add(OI->get()); 
-    OC.Done(&I); 
-  } 
- 
-  size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) { 
-    assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) && 
-           "Vector of pointers is not a valid shadow type"); 
-    return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() * 
-                                  Ty->getScalarSizeInBits() 
-                            : Ty->getPrimitiveSizeInBits(); 
-  } 
- 
-  /// Cast between two shadow types, extending or truncating as 
-  /// necessary. 
-  Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy, 
-                          bool Signed = false) { 
-    Type *srcTy = V->getType(); 
-    size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy); 
-    size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy); 
-    if (srcSizeInBits > 1 && dstSizeInBits == 1) 
-      return IRB.CreateICmpNE(V, getCleanShadow(V)); 
- 
-    if (dstTy->isIntegerTy() && srcTy->isIntegerTy()) 
-      return IRB.CreateIntCast(V, dstTy, Signed); 
-    if (dstTy->isVectorTy() && srcTy->isVectorTy() && 
-        cast<FixedVectorType>(dstTy)->getNumElements() == 
-            cast<FixedVectorType>(srcTy)->getNumElements()) 
-      return IRB.CreateIntCast(V, dstTy, Signed); 
-    Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits)); 
-    Value *V2 = 
-      IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed); 
-    return IRB.CreateBitCast(V2, dstTy); 
-    // TODO: handle struct types. 
-  } 
- 
-  /// Cast an application value to the type of its own shadow. 
-  Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) { 
-    Type *ShadowTy = getShadowTy(V); 
-    if (V->getType() == ShadowTy) 
-      return V; 
-    if (V->getType()->isPtrOrPtrVectorTy()) 
-      return IRB.CreatePtrToInt(V, ShadowTy); 
-    else 
-      return IRB.CreateBitCast(V, ShadowTy); 
-  } 
- 
-  /// Propagate shadow for arbitrary operation. 
-  void handleShadowOr(Instruction &I) { 
-    IRBuilder<> IRB(&I); 
-    ShadowAndOriginCombiner SC(this, IRB); 
-    for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI) 
-      SC.Add(OI->get()); 
-    SC.Done(&I); 
-  } 
- 
-  void visitFNeg(UnaryOperator &I) { handleShadowOr(I); } 
- 
-  // Handle multiplication by constant. 
-  // 
-  // Handle a special case of multiplication by constant that may have one or 
-  // more zeros in the lower bits. This makes corresponding number of lower bits 
-  // of the result zero as well. We model it by shifting the other operand 
-  // shadow left by the required number of bits. Effectively, we transform 
-  // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B). 
-  // We use multiplication by 2**N instead of shift to cover the case of 
-  // multiplication by 0, which may occur in some elements of a vector operand. 
-  void handleMulByConstant(BinaryOperator &I, Constant *ConstArg, 
-                           Value *OtherArg) { 
-    Constant *ShadowMul; 
-    Type *Ty = ConstArg->getType(); 
-    if (auto *VTy = dyn_cast<VectorType>(Ty)) { 
-      unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements(); 
-      Type *EltTy = VTy->getElementType(); 
-      SmallVector<Constant *, 16> Elements; 
-      for (unsigned Idx = 0; Idx < NumElements; ++Idx) { 
-        if (ConstantInt *Elt = 
-                dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) { 
-          const APInt &V = Elt->getValue(); 
-          APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); 
-          Elements.push_back(ConstantInt::get(EltTy, V2)); 
-        } else { 
-          Elements.push_back(ConstantInt::get(EltTy, 1)); 
-        } 
-      } 
-      ShadowMul = ConstantVector::get(Elements); 
-    } else { 
-      if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) { 
-        const APInt &V = Elt->getValue(); 
-        APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); 
-        ShadowMul = ConstantInt::get(Ty, V2); 
-      } else { 
-        ShadowMul = ConstantInt::get(Ty, 1); 
-      } 
-    } 
- 
-    IRBuilder<> IRB(&I); 
-    setShadow(&I, 
-              IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst")); 
-    setOrigin(&I, getOrigin(OtherArg)); 
-  } 
- 
-  void visitMul(BinaryOperator &I) { 
-    Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0)); 
-    Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1)); 
-    if (constOp0 && !constOp1) 
-      handleMulByConstant(I, constOp0, I.getOperand(1)); 
-    else if (constOp1 && !constOp0) 
-      handleMulByConstant(I, constOp1, I.getOperand(0)); 
-    else 
-      handleShadowOr(I); 
-  } 
- 
-  void visitFAdd(BinaryOperator &I) { handleShadowOr(I); } 
-  void visitFSub(BinaryOperator &I) { handleShadowOr(I); } 
-  void visitFMul(BinaryOperator &I) { handleShadowOr(I); } 
-  void visitAdd(BinaryOperator &I) { handleShadowOr(I); } 
-  void visitSub(BinaryOperator &I) { handleShadowOr(I); } 
-  void visitXor(BinaryOperator &I) { handleShadowOr(I); } 
- 
-  void handleIntegerDiv(Instruction &I) { 
-    IRBuilder<> IRB(&I); 
-    // Strict on the second argument. 
-    insertShadowCheck(I.getOperand(1), &I); 
-    setShadow(&I, getShadow(&I, 0)); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
-  void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); } 
-  void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); } 
-  void visitURem(BinaryOperator &I) { handleIntegerDiv(I); } 
-  void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); } 
- 
-  // Floating point division is side-effect free. We can not require that the 
-  // divisor is fully initialized and must propagate shadow. See PR37523. 
-  void visitFDiv(BinaryOperator &I) { handleShadowOr(I); } 
-  void visitFRem(BinaryOperator &I) { handleShadowOr(I); } 
- 
-  /// Instrument == and != comparisons. 
-  /// 
-  /// Sometimes the comparison result is known even if some of the bits of the 
-  /// arguments are not. 
-  void handleEqualityComparison(ICmpInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *A = I.getOperand(0); 
-    Value *B = I.getOperand(1); 
-    Value *Sa = getShadow(A); 
-    Value *Sb = getShadow(B); 
- 
-    // Get rid of pointers and vectors of pointers. 
-    // For ints (and vectors of ints), types of A and Sa match, 
-    // and this is a no-op. 
-    A = IRB.CreatePointerCast(A, Sa->getType()); 
-    B = IRB.CreatePointerCast(B, Sb->getType()); 
- 
-    // A == B  <==>  (C = A^B) == 0 
-    // A != B  <==>  (C = A^B) != 0 
-    // Sc = Sa | Sb 
-    Value *C = IRB.CreateXor(A, B); 
-    Value *Sc = IRB.CreateOr(Sa, Sb); 
-    // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now) 
-    // Result is defined if one of the following is true 
-    // * there is a defined 1 bit in C 
-    // * C is fully defined 
-    // Si = !(C & ~Sc) && Sc 
-    Value *Zero = Constant::getNullValue(Sc->getType()); 
-    Value *MinusOne = Constant::getAllOnesValue(Sc->getType()); 
-    Value *Si = 
-      IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero), 
-                    IRB.CreateICmpEQ( 
-                      IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero)); 
-    Si->setName("_msprop_icmp"); 
-    setShadow(&I, Si); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  /// Build the lowest possible value of V, taking into account V's 
-  ///        uninitialized bits. 
-  Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa, 
-                                bool isSigned) { 
-    if (isSigned) { 
-      // Split shadow into sign bit and other bits. 
-      Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1); 
-      Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits); 
-      // Maximise the undefined shadow bit, minimize other undefined bits. 
-      return 
-        IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit); 
-    } else { 
-      // Minimize undefined bits. 
-      return IRB.CreateAnd(A, IRB.CreateNot(Sa)); 
-    } 
-  } 
- 
-  /// Build the highest possible value of V, taking into account V's 
-  ///        uninitialized bits. 
-  Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa, 
-                                bool isSigned) { 
-    if (isSigned) { 
-      // Split shadow into sign bit and other bits. 
-      Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1); 
-      Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits); 
-      // Minimise the undefined shadow bit, maximise other undefined bits. 
-      return 
-        IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits); 
-    } else { 
-      // Maximize undefined bits. 
-      return IRB.CreateOr(A, Sa); 
-    } 
-  } 
- 
-  /// Instrument relational comparisons. 
-  /// 
-  /// This function does exact shadow propagation for all relational 
-  /// comparisons of integers, pointers and vectors of those. 
-  /// FIXME: output seems suboptimal when one of the operands is a constant 
-  void handleRelationalComparisonExact(ICmpInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *A = I.getOperand(0); 
-    Value *B = I.getOperand(1); 
-    Value *Sa = getShadow(A); 
-    Value *Sb = getShadow(B); 
- 
-    // Get rid of pointers and vectors of pointers. 
-    // For ints (and vectors of ints), types of A and Sa match, 
-    // and this is a no-op. 
-    A = IRB.CreatePointerCast(A, Sa->getType()); 
-    B = IRB.CreatePointerCast(B, Sb->getType()); 
- 
-    // Let [a0, a1] be the interval of possible values of A, taking into account 
-    // its undefined bits. Let [b0, b1] be the interval of possible values of B. 
-    // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0). 
-    bool IsSigned = I.isSigned(); 
-    Value *S1 = IRB.CreateICmp(I.getPredicate(), 
-                               getLowestPossibleValue(IRB, A, Sa, IsSigned), 
-                               getHighestPossibleValue(IRB, B, Sb, IsSigned)); 
-    Value *S2 = IRB.CreateICmp(I.getPredicate(), 
-                               getHighestPossibleValue(IRB, A, Sa, IsSigned), 
-                               getLowestPossibleValue(IRB, B, Sb, IsSigned)); 
-    Value *Si = IRB.CreateXor(S1, S2); 
-    setShadow(&I, Si); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  /// Instrument signed relational comparisons. 
-  /// 
-  /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest 
-  /// bit of the shadow. Everything else is delegated to handleShadowOr(). 
-  void handleSignedRelationalComparison(ICmpInst &I) { 
-    Constant *constOp; 
-    Value *op = nullptr; 
-    CmpInst::Predicate pre; 
-    if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) { 
-      op = I.getOperand(0); 
-      pre = I.getPredicate(); 
-    } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) { 
-      op = I.getOperand(1); 
-      pre = I.getSwappedPredicate(); 
-    } else { 
-      handleShadowOr(I); 
-      return; 
-    } 
- 
-    if ((constOp->isNullValue() && 
-         (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) || 
-        (constOp->isAllOnesValue() && 
-         (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) { 
-      IRBuilder<> IRB(&I); 
-      Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), 
-                                        "_msprop_icmp_s"); 
-      setShadow(&I, Shadow); 
-      setOrigin(&I, getOrigin(op)); 
-    } else { 
-      handleShadowOr(I); 
-    } 
-  } 
- 
-  void visitICmpInst(ICmpInst &I) { 
-    if (!ClHandleICmp) { 
-      handleShadowOr(I); 
-      return; 
-    } 
-    if (I.isEquality()) { 
-      handleEqualityComparison(I); 
-      return; 
-    } 
- 
-    assert(I.isRelational()); 
-    if (ClHandleICmpExact) { 
-      handleRelationalComparisonExact(I); 
-      return; 
-    } 
-    if (I.isSigned()) { 
-      handleSignedRelationalComparison(I); 
-      return; 
-    } 
- 
-    assert(I.isUnsigned()); 
-    if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) { 
-      handleRelationalComparisonExact(I); 
-      return; 
-    } 
- 
-    handleShadowOr(I); 
-  } 
- 
-  void visitFCmpInst(FCmpInst &I) { 
-    handleShadowOr(I); 
-  } 
- 
-  void handleShift(BinaryOperator &I) { 
-    IRBuilder<> IRB(&I); 
-    // If any of the S2 bits are poisoned, the whole thing is poisoned. 
-    // Otherwise perform the same shift on S1. 
-    Value *S1 = getShadow(&I, 0); 
-    Value *S2 = getShadow(&I, 1); 
-    Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), 
-                                   S2->getType()); 
-    Value *V2 = I.getOperand(1); 
-    Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2); 
-    setShadow(&I, IRB.CreateOr(Shift, S2Conv)); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  void visitShl(BinaryOperator &I) { handleShift(I); } 
-  void visitAShr(BinaryOperator &I) { handleShift(I); } 
-  void visitLShr(BinaryOperator &I) { handleShift(I); } 
- 
-  /// Instrument llvm.memmove 
-  /// 
-  /// At this point we don't know if llvm.memmove will be inlined or not. 
-  /// If we don't instrument it and it gets inlined, 
-  /// our interceptor will not kick in and we will lose the memmove. 
-  /// If we instrument the call here, but it does not get inlined, 
-  /// we will memove the shadow twice: which is bad in case 
-  /// of overlapping regions. So, we simply lower the intrinsic to a call. 
-  /// 
-  /// Similar situation exists for memcpy and memset. 
-  void visitMemMoveInst(MemMoveInst &I) { 
-    IRBuilder<> IRB(&I); 
-    IRB.CreateCall( 
-        MS.MemmoveFn, 
-        {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()), 
-         IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()), 
-         IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)}); 
-    I.eraseFromParent(); 
-  } 
- 
-  // Similar to memmove: avoid copying shadow twice. 
-  // This is somewhat unfortunate as it may slowdown small constant memcpys. 
-  // FIXME: consider doing manual inline for small constant sizes and proper 
-  // alignment. 
-  void visitMemCpyInst(MemCpyInst &I) { 
-    IRBuilder<> IRB(&I); 
-    IRB.CreateCall( 
-        MS.MemcpyFn, 
-        {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()), 
-         IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()), 
-         IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)}); 
-    I.eraseFromParent(); 
-  } 
- 
-  // Same as memcpy. 
-  void visitMemSetInst(MemSetInst &I) { 
-    IRBuilder<> IRB(&I); 
-    IRB.CreateCall( 
-        MS.MemsetFn, 
-        {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()), 
-         IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false), 
-         IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)}); 
-    I.eraseFromParent(); 
-  } 
- 
-  void visitVAStartInst(VAStartInst &I) { 
-    VAHelper->visitVAStartInst(I); 
-  } 
- 
-  void visitVACopyInst(VACopyInst &I) { 
-    VAHelper->visitVACopyInst(I); 
-  } 
- 
-  /// Handle vector store-like intrinsics. 
-  /// 
-  /// Instrument intrinsics that look like a simple SIMD store: writes memory, 
-  /// has 1 pointer argument and 1 vector argument, returns void. 
-  bool handleVectorStoreIntrinsic(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value* Addr = I.getArgOperand(0); 
-    Value *Shadow = getShadow(&I, 1); 
-    Value *ShadowPtr, *OriginPtr; 
- 
-    // We don't know the pointer alignment (could be unaligned SSE store!). 
-    // Have to assume to worst case. 
-    std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr( 
-        Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true); 
-    IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1)); 
- 
-    if (ClCheckAccessAddress) 
-      insertShadowCheck(Addr, &I); 
- 
-    // FIXME: factor out common code from materializeStores 
-    if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr); 
-    return true; 
-  } 
- 
-  /// Handle vector load-like intrinsics. 
-  /// 
-  /// Instrument intrinsics that look like a simple SIMD load: reads memory, 
-  /// has 1 pointer argument, returns a vector. 
-  bool handleVectorLoadIntrinsic(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *Addr = I.getArgOperand(0); 
- 
-    Type *ShadowTy = getShadowTy(&I); 
-    Value *ShadowPtr = nullptr, *OriginPtr = nullptr; 
-    if (PropagateShadow) { 
-      // We don't know the pointer alignment (could be unaligned SSE load!). 
-      // Have to assume to worst case. 
-      const Align Alignment = Align(1); 
-      std::tie(ShadowPtr, OriginPtr) = 
-          getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false); 
-      setShadow(&I, 
-                IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld")); 
-    } else { 
-      setShadow(&I, getCleanShadow(&I)); 
-    } 
- 
-    if (ClCheckAccessAddress) 
-      insertShadowCheck(Addr, &I); 
- 
-    if (MS.TrackOrigins) { 
-      if (PropagateShadow) 
-        setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr)); 
-      else 
-        setOrigin(&I, getCleanOrigin()); 
-    } 
-    return true; 
-  } 
- 
-  /// Handle (SIMD arithmetic)-like intrinsics. 
-  /// 
-  /// Instrument intrinsics with any number of arguments of the same type, 
-  /// equal to the return type. The type should be simple (no aggregates or 
-  /// pointers; vectors are fine). 
-  /// Caller guarantees that this intrinsic does not access memory. 
-  bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) { 
-    Type *RetTy = I.getType(); 
-    if (!(RetTy->isIntOrIntVectorTy() || 
-          RetTy->isFPOrFPVectorTy() || 
-          RetTy->isX86_MMXTy())) 
-      return false; 
- 
-    unsigned NumArgOperands = I.getNumArgOperands(); 
-    for (unsigned i = 0; i < NumArgOperands; ++i) { 
-      Type *Ty = I.getArgOperand(i)->getType(); 
-      if (Ty != RetTy) 
-        return false; 
-    } 
- 
-    IRBuilder<> IRB(&I); 
-    ShadowAndOriginCombiner SC(this, IRB); 
-    for (unsigned i = 0; i < NumArgOperands; ++i) 
-      SC.Add(I.getArgOperand(i)); 
-    SC.Done(&I); 
- 
-    return true; 
-  } 
- 
-  /// Heuristically instrument unknown intrinsics. 
-  /// 
-  /// The main purpose of this code is to do something reasonable with all 
-  /// random intrinsics we might encounter, most importantly - SIMD intrinsics. 
-  /// We recognize several classes of intrinsics by their argument types and 
-  /// ModRefBehaviour and apply special instrumentation when we are reasonably 
-  /// sure that we know what the intrinsic does. 
-  /// 
-  /// We special-case intrinsics where this approach fails. See llvm.bswap 
-  /// handling as an example of that. 
-  bool handleUnknownIntrinsic(IntrinsicInst &I) { 
-    unsigned NumArgOperands = I.getNumArgOperands(); 
-    if (NumArgOperands == 0) 
-      return false; 
- 
-    if (NumArgOperands == 2 && 
-        I.getArgOperand(0)->getType()->isPointerTy() && 
-        I.getArgOperand(1)->getType()->isVectorTy() && 
-        I.getType()->isVoidTy() && 
-        !I.onlyReadsMemory()) { 
-      // This looks like a vector store. 
-      return handleVectorStoreIntrinsic(I); 
-    } 
- 
-    if (NumArgOperands == 1 && 
-        I.getArgOperand(0)->getType()->isPointerTy() && 
-        I.getType()->isVectorTy() && 
-        I.onlyReadsMemory()) { 
-      // This looks like a vector load. 
-      return handleVectorLoadIntrinsic(I); 
-    } 
- 
-    if (I.doesNotAccessMemory()) 
-      if (maybeHandleSimpleNomemIntrinsic(I)) 
-        return true; 
- 
-    // FIXME: detect and handle SSE maskstore/maskload 
-    return false; 
-  } 
- 
-  void handleInvariantGroup(IntrinsicInst &I) { 
-    setShadow(&I, getShadow(&I, 0)); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
-  void handleLifetimeStart(IntrinsicInst &I) { 
-    if (!PoisonStack) 
-      return; 
+            Value *Cond =
+                IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
+            Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
+          }
+        }
+      }
+      return *this;
+    }
+
+    /// Add an application value to the mix.
+    Combiner &Add(Value *V) {
+      Value *OpShadow = MSV->getShadow(V);
+      Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
+      return Add(OpShadow, OpOrigin);
+    }
+
+    /// Set the current combined values as the given instruction's shadow
+    /// and origin.
+    void Done(Instruction *I) {
+      if (CombineShadow) {
+        assert(Shadow);
+        Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
+        MSV->setShadow(I, Shadow);
+      }
+      if (MSV->MS.TrackOrigins) {
+        assert(Origin);
+        MSV->setOrigin(I, Origin);
+      }
+    }
+  };
+
+  using ShadowAndOriginCombiner = Combiner<true>;
+  using OriginCombiner = Combiner<false>;
+
+  /// Propagate origin for arbitrary operation.
+  void setOriginForNaryOp(Instruction &I) {
+    if (!MS.TrackOrigins) return;
+    IRBuilder<> IRB(&I);
+    OriginCombiner OC(this, IRB);
+    for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
+      OC.Add(OI->get());
+    OC.Done(&I);
+  }
+
+  size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
+    assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
+           "Vector of pointers is not a valid shadow type");
+    return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() *
+                                  Ty->getScalarSizeInBits()
+                            : Ty->getPrimitiveSizeInBits();
+  }
+
+  /// Cast between two shadow types, extending or truncating as
+  /// necessary.
+  Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
+                          bool Signed = false) {
+    Type *srcTy = V->getType();
+    size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
+    size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
+    if (srcSizeInBits > 1 && dstSizeInBits == 1)
+      return IRB.CreateICmpNE(V, getCleanShadow(V));
+
+    if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
+      return IRB.CreateIntCast(V, dstTy, Signed);
+    if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
+        cast<FixedVectorType>(dstTy)->getNumElements() ==
+            cast<FixedVectorType>(srcTy)->getNumElements())
+      return IRB.CreateIntCast(V, dstTy, Signed);
+    Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
+    Value *V2 =
+      IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
+    return IRB.CreateBitCast(V2, dstTy);
+    // TODO: handle struct types.
+  }
+
+  /// Cast an application value to the type of its own shadow.
+  Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
+    Type *ShadowTy = getShadowTy(V);
+    if (V->getType() == ShadowTy)
+      return V;
+    if (V->getType()->isPtrOrPtrVectorTy())
+      return IRB.CreatePtrToInt(V, ShadowTy);
+    else
+      return IRB.CreateBitCast(V, ShadowTy);
+  }
+
+  /// Propagate shadow for arbitrary operation.
+  void handleShadowOr(Instruction &I) {
+    IRBuilder<> IRB(&I);
+    ShadowAndOriginCombiner SC(this, IRB);
+    for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
+      SC.Add(OI->get());
+    SC.Done(&I);
+  }
+
+  void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
+
+  // Handle multiplication by constant.
+  //
+  // Handle a special case of multiplication by constant that may have one or
+  // more zeros in the lower bits. This makes corresponding number of lower bits
+  // of the result zero as well. We model it by shifting the other operand
+  // shadow left by the required number of bits. Effectively, we transform
+  // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
+  // We use multiplication by 2**N instead of shift to cover the case of
+  // multiplication by 0, which may occur in some elements of a vector operand.
+  void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
+                           Value *OtherArg) {
+    Constant *ShadowMul;
+    Type *Ty = ConstArg->getType();
+    if (auto *VTy = dyn_cast<VectorType>(Ty)) {
+      unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements();
+      Type *EltTy = VTy->getElementType();
+      SmallVector<Constant *, 16> Elements;
+      for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
+        if (ConstantInt *Elt =
+                dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
+          const APInt &V = Elt->getValue();
+          APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+          Elements.push_back(ConstantInt::get(EltTy, V2));
+        } else {
+          Elements.push_back(ConstantInt::get(EltTy, 1));
+        }
+      }
+      ShadowMul = ConstantVector::get(Elements);
+    } else {
+      if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
+        const APInt &V = Elt->getValue();
+        APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+        ShadowMul = ConstantInt::get(Ty, V2);
+      } else {
+        ShadowMul = ConstantInt::get(Ty, 1);
+      }
+    }
+
+    IRBuilder<> IRB(&I);
+    setShadow(&I,
+              IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
+    setOrigin(&I, getOrigin(OtherArg));
+  }
+
+  void visitMul(BinaryOperator &I) {
+    Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
+    Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
+    if (constOp0 && !constOp1)
+      handleMulByConstant(I, constOp0, I.getOperand(1));
+    else if (constOp1 && !constOp0)
+      handleMulByConstant(I, constOp1, I.getOperand(0));
+    else
+      handleShadowOr(I);
+  }
+
+  void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
+  void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
+  void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
+  void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
+  void visitSub(BinaryOperator &I) { handleShadowOr(I); }
+  void visitXor(BinaryOperator &I) { handleShadowOr(I); }
+
+  void handleIntegerDiv(Instruction &I) {
+    IRBuilder<> IRB(&I);
+    // Strict on the second argument.
+    insertShadowCheck(I.getOperand(1), &I);
+    setShadow(&I, getShadow(&I, 0));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
+  void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
+  void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
+  void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
+
+  // Floating point division is side-effect free. We can not require that the
+  // divisor is fully initialized and must propagate shadow. See PR37523.
+  void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
+  void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
+
+  /// Instrument == and != comparisons.
+  ///
+  /// Sometimes the comparison result is known even if some of the bits of the
+  /// arguments are not.
+  void handleEqualityComparison(ICmpInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *A = I.getOperand(0);
+    Value *B = I.getOperand(1);
+    Value *Sa = getShadow(A);
+    Value *Sb = getShadow(B);
+
+    // Get rid of pointers and vectors of pointers.
+    // For ints (and vectors of ints), types of A and Sa match,
+    // and this is a no-op.
+    A = IRB.CreatePointerCast(A, Sa->getType());
+    B = IRB.CreatePointerCast(B, Sb->getType());
+
+    // A == B  <==>  (C = A^B) == 0
+    // A != B  <==>  (C = A^B) != 0
+    // Sc = Sa | Sb
+    Value *C = IRB.CreateXor(A, B);
+    Value *Sc = IRB.CreateOr(Sa, Sb);
+    // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
+    // Result is defined if one of the following is true
+    // * there is a defined 1 bit in C
+    // * C is fully defined
+    // Si = !(C & ~Sc) && Sc
+    Value *Zero = Constant::getNullValue(Sc->getType());
+    Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
+    Value *Si =
+      IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
+                    IRB.CreateICmpEQ(
+                      IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
+    Si->setName("_msprop_icmp");
+    setShadow(&I, Si);
+    setOriginForNaryOp(I);
+  }
+
+  /// Build the lowest possible value of V, taking into account V's
+  ///        uninitialized bits.
+  Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
+                                bool isSigned) {
+    if (isSigned) {
+      // Split shadow into sign bit and other bits.
+      Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
+      Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
+      // Maximise the undefined shadow bit, minimize other undefined bits.
+      return
+        IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
+    } else {
+      // Minimize undefined bits.
+      return IRB.CreateAnd(A, IRB.CreateNot(Sa));
+    }
+  }
+
+  /// Build the highest possible value of V, taking into account V's
+  ///        uninitialized bits.
+  Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
+                                bool isSigned) {
+    if (isSigned) {
+      // Split shadow into sign bit and other bits.
+      Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
+      Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
+      // Minimise the undefined shadow bit, maximise other undefined bits.
+      return
+        IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
+    } else {
+      // Maximize undefined bits.
+      return IRB.CreateOr(A, Sa);
+    }
+  }
+
+  /// Instrument relational comparisons.
+  ///
+  /// This function does exact shadow propagation for all relational
+  /// comparisons of integers, pointers and vectors of those.
+  /// FIXME: output seems suboptimal when one of the operands is a constant
+  void handleRelationalComparisonExact(ICmpInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *A = I.getOperand(0);
+    Value *B = I.getOperand(1);
+    Value *Sa = getShadow(A);
+    Value *Sb = getShadow(B);
+
+    // Get rid of pointers and vectors of pointers.
+    // For ints (and vectors of ints), types of A and Sa match,
+    // and this is a no-op.
+    A = IRB.CreatePointerCast(A, Sa->getType());
+    B = IRB.CreatePointerCast(B, Sb->getType());
+
+    // Let [a0, a1] be the interval of possible values of A, taking into account
+    // its undefined bits. Let [b0, b1] be the interval of possible values of B.
+    // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
+    bool IsSigned = I.isSigned();
+    Value *S1 = IRB.CreateICmp(I.getPredicate(),
+                               getLowestPossibleValue(IRB, A, Sa, IsSigned),
+                               getHighestPossibleValue(IRB, B, Sb, IsSigned));
+    Value *S2 = IRB.CreateICmp(I.getPredicate(),
+                               getHighestPossibleValue(IRB, A, Sa, IsSigned),
+                               getLowestPossibleValue(IRB, B, Sb, IsSigned));
+    Value *Si = IRB.CreateXor(S1, S2);
+    setShadow(&I, Si);
+    setOriginForNaryOp(I);
+  }
+
+  /// Instrument signed relational comparisons.
+  ///
+  /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
+  /// bit of the shadow. Everything else is delegated to handleShadowOr().
+  void handleSignedRelationalComparison(ICmpInst &I) {
+    Constant *constOp;
+    Value *op = nullptr;
+    CmpInst::Predicate pre;
+    if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
+      op = I.getOperand(0);
+      pre = I.getPredicate();
+    } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
+      op = I.getOperand(1);
+      pre = I.getSwappedPredicate();
+    } else {
+      handleShadowOr(I);
+      return;
+    }
+
+    if ((constOp->isNullValue() &&
+         (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
+        (constOp->isAllOnesValue() &&
+         (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
+      IRBuilder<> IRB(&I);
+      Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
+                                        "_msprop_icmp_s");
+      setShadow(&I, Shadow);
+      setOrigin(&I, getOrigin(op));
+    } else {
+      handleShadowOr(I);
+    }
+  }
+
+  void visitICmpInst(ICmpInst &I) {
+    if (!ClHandleICmp) {
+      handleShadowOr(I);
+      return;
+    }
+    if (I.isEquality()) {
+      handleEqualityComparison(I);
+      return;
+    }
+
+    assert(I.isRelational());
+    if (ClHandleICmpExact) {
+      handleRelationalComparisonExact(I);
+      return;
+    }
+    if (I.isSigned()) {
+      handleSignedRelationalComparison(I);
+      return;
+    }
+
+    assert(I.isUnsigned());
+    if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
+      handleRelationalComparisonExact(I);
+      return;
+    }
+
+    handleShadowOr(I);
+  }
+
+  void visitFCmpInst(FCmpInst &I) {
+    handleShadowOr(I);
+  }
+
+  void handleShift(BinaryOperator &I) {
+    IRBuilder<> IRB(&I);
+    // If any of the S2 bits are poisoned, the whole thing is poisoned.
+    // Otherwise perform the same shift on S1.
+    Value *S1 = getShadow(&I, 0);
+    Value *S2 = getShadow(&I, 1);
+    Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
+                                   S2->getType());
+    Value *V2 = I.getOperand(1);
+    Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
+    setShadow(&I, IRB.CreateOr(Shift, S2Conv));
+    setOriginForNaryOp(I);
+  }
+
+  void visitShl(BinaryOperator &I) { handleShift(I); }
+  void visitAShr(BinaryOperator &I) { handleShift(I); }
+  void visitLShr(BinaryOperator &I) { handleShift(I); }
+
+  /// Instrument llvm.memmove
+  ///
+  /// At this point we don't know if llvm.memmove will be inlined or not.
+  /// If we don't instrument it and it gets inlined,
+  /// our interceptor will not kick in and we will lose the memmove.
+  /// If we instrument the call here, but it does not get inlined,
+  /// we will memove the shadow twice: which is bad in case
+  /// of overlapping regions. So, we simply lower the intrinsic to a call.
+  ///
+  /// Similar situation exists for memcpy and memset.
+  void visitMemMoveInst(MemMoveInst &I) {
+    IRBuilder<> IRB(&I);
+    IRB.CreateCall(
+        MS.MemmoveFn,
+        {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
+    I.eraseFromParent();
+  }
+
+  // Similar to memmove: avoid copying shadow twice.
+  // This is somewhat unfortunate as it may slowdown small constant memcpys.
+  // FIXME: consider doing manual inline for small constant sizes and proper
+  // alignment.
+  void visitMemCpyInst(MemCpyInst &I) {
+    IRBuilder<> IRB(&I);
+    IRB.CreateCall(
+        MS.MemcpyFn,
+        {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
+    I.eraseFromParent();
+  }
+
+  // Same as memcpy.
+  void visitMemSetInst(MemSetInst &I) {
+    IRBuilder<> IRB(&I);
+    IRB.CreateCall(
+        MS.MemsetFn,
+        {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
+         IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
+    I.eraseFromParent();
+  }
+
+  void visitVAStartInst(VAStartInst &I) {
+    VAHelper->visitVAStartInst(I);
+  }
+
+  void visitVACopyInst(VACopyInst &I) {
+    VAHelper->visitVACopyInst(I);
+  }
+
+  /// Handle vector store-like intrinsics.
+  ///
+  /// Instrument intrinsics that look like a simple SIMD store: writes memory,
+  /// has 1 pointer argument and 1 vector argument, returns void.
+  bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value* Addr = I.getArgOperand(0);
+    Value *Shadow = getShadow(&I, 1);
+    Value *ShadowPtr, *OriginPtr;
+
+    // We don't know the pointer alignment (could be unaligned SSE store!).
+    // Have to assume to worst case.
+    std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
+        Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true);
+    IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1));
+
+    if (ClCheckAccessAddress)
+      insertShadowCheck(Addr, &I);
+
+    // FIXME: factor out common code from materializeStores
+    if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
+    return true;
+  }
+
+  /// Handle vector load-like intrinsics.
+  ///
+  /// Instrument intrinsics that look like a simple SIMD load: reads memory,
+  /// has 1 pointer argument, returns a vector.
+  bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *Addr = I.getArgOperand(0);
+
+    Type *ShadowTy = getShadowTy(&I);
+    Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
+    if (PropagateShadow) {
+      // We don't know the pointer alignment (could be unaligned SSE load!).
+      // Have to assume to worst case.
+      const Align Alignment = Align(1);
+      std::tie(ShadowPtr, OriginPtr) =
+          getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
+      setShadow(&I,
+                IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
+    } else {
+      setShadow(&I, getCleanShadow(&I));
+    }
+
+    if (ClCheckAccessAddress)
+      insertShadowCheck(Addr, &I);
+
+    if (MS.TrackOrigins) {
+      if (PropagateShadow)
+        setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
+      else
+        setOrigin(&I, getCleanOrigin());
+    }
+    return true;
+  }
+
+  /// Handle (SIMD arithmetic)-like intrinsics.
+  ///
+  /// Instrument intrinsics with any number of arguments of the same type,
+  /// equal to the return type. The type should be simple (no aggregates or
+  /// pointers; vectors are fine).
+  /// Caller guarantees that this intrinsic does not access memory.
+  bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
+    Type *RetTy = I.getType();
+    if (!(RetTy->isIntOrIntVectorTy() ||
+          RetTy->isFPOrFPVectorTy() ||
+          RetTy->isX86_MMXTy()))
+      return false;
+
+    unsigned NumArgOperands = I.getNumArgOperands();
+    for (unsigned i = 0; i < NumArgOperands; ++i) {
+      Type *Ty = I.getArgOperand(i)->getType();
+      if (Ty != RetTy)
+        return false;
+    }
+
+    IRBuilder<> IRB(&I);
+    ShadowAndOriginCombiner SC(this, IRB);
+    for (unsigned i = 0; i < NumArgOperands; ++i)
+      SC.Add(I.getArgOperand(i));
+    SC.Done(&I);
+
+    return true;
+  }
+
+  /// Heuristically instrument unknown intrinsics.
+  ///
+  /// The main purpose of this code is to do something reasonable with all
+  /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
+  /// We recognize several classes of intrinsics by their argument types and
+  /// ModRefBehaviour and apply special instrumentation when we are reasonably
+  /// sure that we know what the intrinsic does.
+  ///
+  /// We special-case intrinsics where this approach fails. See llvm.bswap
+  /// handling as an example of that.
+  bool handleUnknownIntrinsic(IntrinsicInst &I) {
+    unsigned NumArgOperands = I.getNumArgOperands();
+    if (NumArgOperands == 0)
+      return false;
+
+    if (NumArgOperands == 2 &&
+        I.getArgOperand(0)->getType()->isPointerTy() &&
+        I.getArgOperand(1)->getType()->isVectorTy() &&
+        I.getType()->isVoidTy() &&
+        !I.onlyReadsMemory()) {
+      // This looks like a vector store.
+      return handleVectorStoreIntrinsic(I);
+    }
+
+    if (NumArgOperands == 1 &&
+        I.getArgOperand(0)->getType()->isPointerTy() &&
+        I.getType()->isVectorTy() &&
+        I.onlyReadsMemory()) {
+      // This looks like a vector load.
+      return handleVectorLoadIntrinsic(I);
+    }
+
+    if (I.doesNotAccessMemory())
+      if (maybeHandleSimpleNomemIntrinsic(I))
+        return true;
+
+    // FIXME: detect and handle SSE maskstore/maskload
+    return false;
+  }
+
+  void handleInvariantGroup(IntrinsicInst &I) {
+    setShadow(&I, getShadow(&I, 0));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void handleLifetimeStart(IntrinsicInst &I) {
+    if (!PoisonStack)
+      return;
     AllocaInst *AI = llvm::findAllocaForValue(I.getArgOperand(1));
-    if (!AI) 
-      InstrumentLifetimeStart = false; 
-    LifetimeStartList.push_back(std::make_pair(&I, AI)); 
-  } 
- 
-  void handleBswap(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *Op = I.getArgOperand(0); 
-    Type *OpType = Op->getType(); 
-    Function *BswapFunc = Intrinsic::getDeclaration( 
-      F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1)); 
-    setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op))); 
-    setOrigin(&I, getOrigin(Op)); 
-  } 
- 
-  // Instrument vector convert intrinsic. 
-  // 
-  // This function instruments intrinsics like cvtsi2ss: 
-  // %Out = int_xxx_cvtyyy(%ConvertOp) 
-  // or 
-  // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp) 
-  // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same 
-  // number \p Out elements, and (if has 2 arguments) copies the rest of the 
-  // elements from \p CopyOp. 
-  // In most cases conversion involves floating-point value which may trigger a 
-  // hardware exception when not fully initialized. For this reason we require 
-  // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise. 
-  // We copy the shadow of \p CopyOp[NumUsedElements:] to \p 
-  // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always 
-  // return a fully initialized value. 
+    if (!AI)
+      InstrumentLifetimeStart = false;
+    LifetimeStartList.push_back(std::make_pair(&I, AI));
+  }
+
+  void handleBswap(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *Op = I.getArgOperand(0);
+    Type *OpType = Op->getType();
+    Function *BswapFunc = Intrinsic::getDeclaration(
+      F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
+    setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
+    setOrigin(&I, getOrigin(Op));
+  }
+
+  // Instrument vector convert intrinsic.
+  //
+  // This function instruments intrinsics like cvtsi2ss:
+  // %Out = int_xxx_cvtyyy(%ConvertOp)
+  // or
+  // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
+  // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
+  // number \p Out elements, and (if has 2 arguments) copies the rest of the
+  // elements from \p CopyOp.
+  // In most cases conversion involves floating-point value which may trigger a
+  // hardware exception when not fully initialized. For this reason we require
+  // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
+  // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
+  // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
+  // return a fully initialized value.
   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements,
                                     bool HasRoundingMode = false) {
-    IRBuilder<> IRB(&I); 
-    Value *CopyOp, *ConvertOp; 
- 
+    IRBuilder<> IRB(&I);
+    Value *CopyOp, *ConvertOp;
+
     assert((!HasRoundingMode ||
             isa<ConstantInt>(I.getArgOperand(I.getNumArgOperands() - 1))) &&
            "Invalid rounding mode");
 
     switch (I.getNumArgOperands() - HasRoundingMode) {
-    case 2: 
-      CopyOp = I.getArgOperand(0); 
-      ConvertOp = I.getArgOperand(1); 
-      break; 
-    case 1: 
-      ConvertOp = I.getArgOperand(0); 
-      CopyOp = nullptr; 
-      break; 
-    default: 
-      llvm_unreachable("Cvt intrinsic with unsupported number of arguments."); 
-    } 
- 
-    // The first *NumUsedElements* elements of ConvertOp are converted to the 
-    // same number of output elements. The rest of the output is copied from 
-    // CopyOp, or (if not available) filled with zeroes. 
-    // Combine shadow for elements of ConvertOp that are used in this operation, 
-    // and insert a check. 
-    // FIXME: consider propagating shadow of ConvertOp, at least in the case of 
-    // int->any conversion. 
-    Value *ConvertShadow = getShadow(ConvertOp); 
-    Value *AggShadow = nullptr; 
-    if (ConvertOp->getType()->isVectorTy()) { 
-      AggShadow = IRB.CreateExtractElement( 
-          ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0)); 
-      for (int i = 1; i < NumUsedElements; ++i) { 
-        Value *MoreShadow = IRB.CreateExtractElement( 
-            ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i)); 
-        AggShadow = IRB.CreateOr(AggShadow, MoreShadow); 
-      } 
-    } else { 
-      AggShadow = ConvertShadow; 
-    } 
-    assert(AggShadow->getType()->isIntegerTy()); 
-    insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I); 
- 
-    // Build result shadow by zero-filling parts of CopyOp shadow that come from 
-    // ConvertOp. 
-    if (CopyOp) { 
-      assert(CopyOp->getType() == I.getType()); 
-      assert(CopyOp->getType()->isVectorTy()); 
-      Value *ResultShadow = getShadow(CopyOp); 
-      Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType(); 
-      for (int i = 0; i < NumUsedElements; ++i) { 
-        ResultShadow = IRB.CreateInsertElement( 
-            ResultShadow, ConstantInt::getNullValue(EltTy), 
-            ConstantInt::get(IRB.getInt32Ty(), i)); 
-      } 
-      setShadow(&I, ResultShadow); 
-      setOrigin(&I, getOrigin(CopyOp)); 
-    } else { 
-      setShadow(&I, getCleanShadow(&I)); 
-      setOrigin(&I, getCleanOrigin()); 
-    } 
-  } 
- 
-  // Given a scalar or vector, extract lower 64 bits (or less), and return all 
-  // zeroes if it is zero, and all ones otherwise. 
-  Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) { 
-    if (S->getType()->isVectorTy()) 
-      S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true); 
-    assert(S->getType()->getPrimitiveSizeInBits() <= 64); 
-    Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S)); 
-    return CreateShadowCast(IRB, S2, T, /* Signed */ true); 
-  } 
- 
-  // Given a vector, extract its first element, and return all 
-  // zeroes if it is zero, and all ones otherwise. 
-  Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) { 
-    Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0); 
-    Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1)); 
-    return CreateShadowCast(IRB, S2, T, /* Signed */ true); 
-  } 
- 
-  Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) { 
-    Type *T = S->getType(); 
-    assert(T->isVectorTy()); 
-    Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S)); 
-    return IRB.CreateSExt(S2, T); 
-  } 
- 
-  // Instrument vector shift intrinsic. 
-  // 
-  // This function instruments intrinsics like int_x86_avx2_psll_w. 
-  // Intrinsic shifts %In by %ShiftSize bits. 
-  // %ShiftSize may be a vector. In that case the lower 64 bits determine shift 
-  // size, and the rest is ignored. Behavior is defined even if shift size is 
-  // greater than register (or field) width. 
-  void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) { 
-    assert(I.getNumArgOperands() == 2); 
-    IRBuilder<> IRB(&I); 
-    // If any of the S2 bits are poisoned, the whole thing is poisoned. 
-    // Otherwise perform the same shift on S1. 
-    Value *S1 = getShadow(&I, 0); 
-    Value *S2 = getShadow(&I, 1); 
-    Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2) 
-                             : Lower64ShadowExtend(IRB, S2, getShadowTy(&I)); 
-    Value *V1 = I.getOperand(0); 
-    Value *V2 = I.getOperand(1); 
-    Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(), 
-                                  {IRB.CreateBitCast(S1, V1->getType()), V2}); 
-    Shift = IRB.CreateBitCast(Shift, getShadowTy(&I)); 
-    setShadow(&I, IRB.CreateOr(Shift, S2Conv)); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  // Get an X86_MMX-sized vector type. 
-  Type *getMMXVectorTy(unsigned EltSizeInBits) { 
-    const unsigned X86_MMXSizeInBits = 64; 
-    assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 && 
-           "Illegal MMX vector element size"); 
-    return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits), 
-                                X86_MMXSizeInBits / EltSizeInBits); 
-  } 
- 
-  // Returns a signed counterpart for an (un)signed-saturate-and-pack 
-  // intrinsic. 
-  Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) { 
-    switch (id) { 
-      case Intrinsic::x86_sse2_packsswb_128: 
-      case Intrinsic::x86_sse2_packuswb_128: 
-        return Intrinsic::x86_sse2_packsswb_128; 
- 
-      case Intrinsic::x86_sse2_packssdw_128: 
-      case Intrinsic::x86_sse41_packusdw: 
-        return Intrinsic::x86_sse2_packssdw_128; 
- 
-      case Intrinsic::x86_avx2_packsswb: 
-      case Intrinsic::x86_avx2_packuswb: 
-        return Intrinsic::x86_avx2_packsswb; 
- 
-      case Intrinsic::x86_avx2_packssdw: 
-      case Intrinsic::x86_avx2_packusdw: 
-        return Intrinsic::x86_avx2_packssdw; 
- 
-      case Intrinsic::x86_mmx_packsswb: 
-      case Intrinsic::x86_mmx_packuswb: 
-        return Intrinsic::x86_mmx_packsswb; 
- 
-      case Intrinsic::x86_mmx_packssdw: 
-        return Intrinsic::x86_mmx_packssdw; 
-      default: 
-        llvm_unreachable("unexpected intrinsic id"); 
-    } 
-  } 
- 
-  // Instrument vector pack intrinsic. 
-  // 
-  // This function instruments intrinsics like x86_mmx_packsswb, that 
-  // packs elements of 2 input vectors into half as many bits with saturation. 
-  // Shadow is propagated with the signed variant of the same intrinsic applied 
-  // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer). 
-  // EltSizeInBits is used only for x86mmx arguments. 
-  void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) { 
-    assert(I.getNumArgOperands() == 2); 
-    bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy(); 
-    IRBuilder<> IRB(&I); 
-    Value *S1 = getShadow(&I, 0); 
-    Value *S2 = getShadow(&I, 1); 
-    assert(isX86_MMX || S1->getType()->isVectorTy()); 
- 
-    // SExt and ICmpNE below must apply to individual elements of input vectors. 
-    // In case of x86mmx arguments, cast them to appropriate vector types and 
-    // back. 
-    Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType(); 
-    if (isX86_MMX) { 
-      S1 = IRB.CreateBitCast(S1, T); 
-      S2 = IRB.CreateBitCast(S2, T); 
-    } 
-    Value *S1_ext = IRB.CreateSExt( 
-        IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T); 
-    Value *S2_ext = IRB.CreateSExt( 
-        IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T); 
-    if (isX86_MMX) { 
-      Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C); 
-      S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy); 
-      S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy); 
-    } 
- 
-    Function *ShadowFn = Intrinsic::getDeclaration( 
-        F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID())); 
- 
-    Value *S = 
-        IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack"); 
-    if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I)); 
-    setShadow(&I, S); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  // Instrument sum-of-absolute-differences intrinsic. 
-  void handleVectorSadIntrinsic(IntrinsicInst &I) { 
-    const unsigned SignificantBitsPerResultElement = 16; 
-    bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy(); 
-    Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType(); 
-    unsigned ZeroBitsPerResultElement = 
-        ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement; 
- 
-    IRBuilder<> IRB(&I); 
-    Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1)); 
-    S = IRB.CreateBitCast(S, ResTy); 
-    S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)), 
-                       ResTy); 
-    S = IRB.CreateLShr(S, ZeroBitsPerResultElement); 
-    S = IRB.CreateBitCast(S, getShadowTy(&I)); 
-    setShadow(&I, S); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  // Instrument multiply-add intrinsic. 
-  void handleVectorPmaddIntrinsic(IntrinsicInst &I, 
-                                  unsigned EltSizeInBits = 0) { 
-    bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy(); 
-    Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType(); 
-    IRBuilder<> IRB(&I); 
-    Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1)); 
-    S = IRB.CreateBitCast(S, ResTy); 
-    S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)), 
-                       ResTy); 
-    S = IRB.CreateBitCast(S, getShadowTy(&I)); 
-    setShadow(&I, S); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  // Instrument compare-packed intrinsic. 
-  // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or 
-  // all-ones shadow. 
-  void handleVectorComparePackedIntrinsic(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Type *ResTy = getShadowTy(&I); 
-    Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1)); 
-    Value *S = IRB.CreateSExt( 
-        IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy); 
-    setShadow(&I, S); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  // Instrument compare-scalar intrinsic. 
-  // This handles both cmp* intrinsics which return the result in the first 
-  // element of a vector, and comi* which return the result as i32. 
-  void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1)); 
-    Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I)); 
-    setShadow(&I, S); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  // Instrument generic vector reduction intrinsics 
-  // by ORing together all their fields. 
-  void handleVectorReduceIntrinsic(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *S = IRB.CreateOrReduce(getShadow(&I, 0)); 
-    setShadow(&I, S); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
+    case 2:
+      CopyOp = I.getArgOperand(0);
+      ConvertOp = I.getArgOperand(1);
+      break;
+    case 1:
+      ConvertOp = I.getArgOperand(0);
+      CopyOp = nullptr;
+      break;
+    default:
+      llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
+    }
+
+    // The first *NumUsedElements* elements of ConvertOp are converted to the
+    // same number of output elements. The rest of the output is copied from
+    // CopyOp, or (if not available) filled with zeroes.
+    // Combine shadow for elements of ConvertOp that are used in this operation,
+    // and insert a check.
+    // FIXME: consider propagating shadow of ConvertOp, at least in the case of
+    // int->any conversion.
+    Value *ConvertShadow = getShadow(ConvertOp);
+    Value *AggShadow = nullptr;
+    if (ConvertOp->getType()->isVectorTy()) {
+      AggShadow = IRB.CreateExtractElement(
+          ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+      for (int i = 1; i < NumUsedElements; ++i) {
+        Value *MoreShadow = IRB.CreateExtractElement(
+            ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
+        AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
+      }
+    } else {
+      AggShadow = ConvertShadow;
+    }
+    assert(AggShadow->getType()->isIntegerTy());
+    insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
+
+    // Build result shadow by zero-filling parts of CopyOp shadow that come from
+    // ConvertOp.
+    if (CopyOp) {
+      assert(CopyOp->getType() == I.getType());
+      assert(CopyOp->getType()->isVectorTy());
+      Value *ResultShadow = getShadow(CopyOp);
+      Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType();
+      for (int i = 0; i < NumUsedElements; ++i) {
+        ResultShadow = IRB.CreateInsertElement(
+            ResultShadow, ConstantInt::getNullValue(EltTy),
+            ConstantInt::get(IRB.getInt32Ty(), i));
+      }
+      setShadow(&I, ResultShadow);
+      setOrigin(&I, getOrigin(CopyOp));
+    } else {
+      setShadow(&I, getCleanShadow(&I));
+      setOrigin(&I, getCleanOrigin());
+    }
+  }
+
+  // Given a scalar or vector, extract lower 64 bits (or less), and return all
+  // zeroes if it is zero, and all ones otherwise.
+  Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
+    if (S->getType()->isVectorTy())
+      S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
+    assert(S->getType()->getPrimitiveSizeInBits() <= 64);
+    Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
+    return CreateShadowCast(IRB, S2, T, /* Signed */ true);
+  }
+
+  // Given a vector, extract its first element, and return all
+  // zeroes if it is zero, and all ones otherwise.
+  Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
+    Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
+    Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
+    return CreateShadowCast(IRB, S2, T, /* Signed */ true);
+  }
+
+  Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
+    Type *T = S->getType();
+    assert(T->isVectorTy());
+    Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
+    return IRB.CreateSExt(S2, T);
+  }
+
+  // Instrument vector shift intrinsic.
+  //
+  // This function instruments intrinsics like int_x86_avx2_psll_w.
+  // Intrinsic shifts %In by %ShiftSize bits.
+  // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
+  // size, and the rest is ignored. Behavior is defined even if shift size is
+  // greater than register (or field) width.
+  void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
+    assert(I.getNumArgOperands() == 2);
+    IRBuilder<> IRB(&I);
+    // If any of the S2 bits are poisoned, the whole thing is poisoned.
+    // Otherwise perform the same shift on S1.
+    Value *S1 = getShadow(&I, 0);
+    Value *S2 = getShadow(&I, 1);
+    Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
+                             : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
+    Value *V1 = I.getOperand(0);
+    Value *V2 = I.getOperand(1);
+    Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
+                                  {IRB.CreateBitCast(S1, V1->getType()), V2});
+    Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
+    setShadow(&I, IRB.CreateOr(Shift, S2Conv));
+    setOriginForNaryOp(I);
+  }
+
+  // Get an X86_MMX-sized vector type.
+  Type *getMMXVectorTy(unsigned EltSizeInBits) {
+    const unsigned X86_MMXSizeInBits = 64;
+    assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
+           "Illegal MMX vector element size");
+    return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
+                                X86_MMXSizeInBits / EltSizeInBits);
+  }
+
+  // Returns a signed counterpart for an (un)signed-saturate-and-pack
+  // intrinsic.
+  Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
+    switch (id) {
+      case Intrinsic::x86_sse2_packsswb_128:
+      case Intrinsic::x86_sse2_packuswb_128:
+        return Intrinsic::x86_sse2_packsswb_128;
+
+      case Intrinsic::x86_sse2_packssdw_128:
+      case Intrinsic::x86_sse41_packusdw:
+        return Intrinsic::x86_sse2_packssdw_128;
+
+      case Intrinsic::x86_avx2_packsswb:
+      case Intrinsic::x86_avx2_packuswb:
+        return Intrinsic::x86_avx2_packsswb;
+
+      case Intrinsic::x86_avx2_packssdw:
+      case Intrinsic::x86_avx2_packusdw:
+        return Intrinsic::x86_avx2_packssdw;
+
+      case Intrinsic::x86_mmx_packsswb:
+      case Intrinsic::x86_mmx_packuswb:
+        return Intrinsic::x86_mmx_packsswb;
+
+      case Intrinsic::x86_mmx_packssdw:
+        return Intrinsic::x86_mmx_packssdw;
+      default:
+        llvm_unreachable("unexpected intrinsic id");
+    }
+  }
+
+  // Instrument vector pack intrinsic.
+  //
+  // This function instruments intrinsics like x86_mmx_packsswb, that
+  // packs elements of 2 input vectors into half as many bits with saturation.
+  // Shadow is propagated with the signed variant of the same intrinsic applied
+  // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
+  // EltSizeInBits is used only for x86mmx arguments.
+  void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
+    assert(I.getNumArgOperands() == 2);
+    bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+    IRBuilder<> IRB(&I);
+    Value *S1 = getShadow(&I, 0);
+    Value *S2 = getShadow(&I, 1);
+    assert(isX86_MMX || S1->getType()->isVectorTy());
+
+    // SExt and ICmpNE below must apply to individual elements of input vectors.
+    // In case of x86mmx arguments, cast them to appropriate vector types and
+    // back.
+    Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
+    if (isX86_MMX) {
+      S1 = IRB.CreateBitCast(S1, T);
+      S2 = IRB.CreateBitCast(S2, T);
+    }
+    Value *S1_ext = IRB.CreateSExt(
+        IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
+    Value *S2_ext = IRB.CreateSExt(
+        IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
+    if (isX86_MMX) {
+      Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
+      S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
+      S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
+    }
+
+    Function *ShadowFn = Intrinsic::getDeclaration(
+        F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
+
+    Value *S =
+        IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
+    if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
+    setShadow(&I, S);
+    setOriginForNaryOp(I);
+  }
+
+  // Instrument sum-of-absolute-differences intrinsic.
+  void handleVectorSadIntrinsic(IntrinsicInst &I) {
+    const unsigned SignificantBitsPerResultElement = 16;
+    bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+    Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
+    unsigned ZeroBitsPerResultElement =
+        ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
+
+    IRBuilder<> IRB(&I);
+    Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+    S = IRB.CreateBitCast(S, ResTy);
+    S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+                       ResTy);
+    S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
+    S = IRB.CreateBitCast(S, getShadowTy(&I));
+    setShadow(&I, S);
+    setOriginForNaryOp(I);
+  }
+
+  // Instrument multiply-add intrinsic.
+  void handleVectorPmaddIntrinsic(IntrinsicInst &I,
+                                  unsigned EltSizeInBits = 0) {
+    bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+    Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
+    IRBuilder<> IRB(&I);
+    Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+    S = IRB.CreateBitCast(S, ResTy);
+    S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+                       ResTy);
+    S = IRB.CreateBitCast(S, getShadowTy(&I));
+    setShadow(&I, S);
+    setOriginForNaryOp(I);
+  }
+
+  // Instrument compare-packed intrinsic.
+  // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
+  // all-ones shadow.
+  void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Type *ResTy = getShadowTy(&I);
+    Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+    Value *S = IRB.CreateSExt(
+        IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
+    setShadow(&I, S);
+    setOriginForNaryOp(I);
+  }
+
+  // Instrument compare-scalar intrinsic.
+  // This handles both cmp* intrinsics which return the result in the first
+  // element of a vector, and comi* which return the result as i32.
+  void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+    Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
+    setShadow(&I, S);
+    setOriginForNaryOp(I);
+  }
+
+  // Instrument generic vector reduction intrinsics
+  // by ORing together all their fields.
+  void handleVectorReduceIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
+    setShadow(&I, S);
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
   // Instrument vector.reduce.or intrinsic.
-  // Valid (non-poisoned) set bits in the operand pull low the 
-  // corresponding shadow bits. 
-  void handleVectorReduceOrIntrinsic(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *OperandShadow = getShadow(&I, 0); 
-    Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0)); 
-    Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow); 
-    // Bit N is clean if any field's bit N is 1 and unpoison 
-    Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison); 
-    // Otherwise, it is clean if every field's bit N is unpoison 
-    Value *OrShadow = IRB.CreateOrReduce(OperandShadow); 
-    Value *S = IRB.CreateAnd(OutShadowMask, OrShadow); 
- 
-    setShadow(&I, S); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
+  // Valid (non-poisoned) set bits in the operand pull low the
+  // corresponding shadow bits.
+  void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *OperandShadow = getShadow(&I, 0);
+    Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
+    Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
+    // Bit N is clean if any field's bit N is 1 and unpoison
+    Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
+    // Otherwise, it is clean if every field's bit N is unpoison
+    Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
+    Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
+
+    setShadow(&I, S);
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
   // Instrument vector.reduce.and intrinsic.
-  // Valid (non-poisoned) unset bits in the operand pull down the 
-  // corresponding shadow bits. 
-  void handleVectorReduceAndIntrinsic(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *OperandShadow = getShadow(&I, 0); 
-    Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow); 
-    // Bit N is clean if any field's bit N is 0 and unpoison 
-    Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison); 
-    // Otherwise, it is clean if every field's bit N is unpoison 
-    Value *OrShadow = IRB.CreateOrReduce(OperandShadow); 
-    Value *S = IRB.CreateAnd(OutShadowMask, OrShadow); 
- 
-    setShadow(&I, S); 
-    setOrigin(&I, getOrigin(&I, 0)); 
-  } 
- 
-  void handleStmxcsr(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value* Addr = I.getArgOperand(0); 
-    Type *Ty = IRB.getInt32Ty(); 
-    Value *ShadowPtr = 
-        getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first; 
- 
-    IRB.CreateStore(getCleanShadow(Ty), 
-                    IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo())); 
- 
-    if (ClCheckAccessAddress) 
-      insertShadowCheck(Addr, &I); 
-  } 
- 
-  void handleLdmxcsr(IntrinsicInst &I) { 
-    if (!InsertChecks) return; 
- 
-    IRBuilder<> IRB(&I); 
-    Value *Addr = I.getArgOperand(0); 
-    Type *Ty = IRB.getInt32Ty(); 
-    const Align Alignment = Align(1); 
-    Value *ShadowPtr, *OriginPtr; 
-    std::tie(ShadowPtr, OriginPtr) = 
-        getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false); 
- 
-    if (ClCheckAccessAddress) 
-      insertShadowCheck(Addr, &I); 
- 
-    Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr"); 
-    Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr) 
-                                    : getCleanOrigin(); 
-    insertShadowCheck(Shadow, Origin, &I); 
-  } 
- 
-  void handleMaskedStore(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *V = I.getArgOperand(0); 
-    Value *Addr = I.getArgOperand(1); 
-    const Align Alignment( 
-        cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()); 
-    Value *Mask = I.getArgOperand(3); 
-    Value *Shadow = getShadow(V); 
- 
-    Value *ShadowPtr; 
-    Value *OriginPtr; 
-    std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr( 
-        Addr, IRB, Shadow->getType(), Alignment, /*isStore*/ true); 
- 
-    if (ClCheckAccessAddress) { 
-      insertShadowCheck(Addr, &I); 
-      // Uninitialized mask is kind of like uninitialized address, but not as 
-      // scary. 
-      insertShadowCheck(Mask, &I); 
-    } 
- 
-    IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask); 
- 
-    if (MS.TrackOrigins) { 
-      auto &DL = F.getParent()->getDataLayout(); 
-      paintOrigin(IRB, getOrigin(V), OriginPtr, 
-                  DL.getTypeStoreSize(Shadow->getType()), 
-                  std::max(Alignment, kMinOriginAlignment)); 
-    } 
-  } 
- 
-  bool handleMaskedLoad(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *Addr = I.getArgOperand(0); 
-    const Align Alignment( 
-        cast<ConstantInt>(I.getArgOperand(1))->getZExtValue()); 
-    Value *Mask = I.getArgOperand(2); 
-    Value *PassThru = I.getArgOperand(3); 
- 
-    Type *ShadowTy = getShadowTy(&I); 
-    Value *ShadowPtr, *OriginPtr; 
-    if (PropagateShadow) { 
-      std::tie(ShadowPtr, OriginPtr) = 
-          getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false); 
-      setShadow(&I, IRB.CreateMaskedLoad(ShadowPtr, Alignment, Mask, 
-                                         getShadow(PassThru), "_msmaskedld")); 
-    } else { 
-      setShadow(&I, getCleanShadow(&I)); 
-    } 
- 
-    if (ClCheckAccessAddress) { 
-      insertShadowCheck(Addr, &I); 
-      insertShadowCheck(Mask, &I); 
-    } 
- 
-    if (MS.TrackOrigins) { 
-      if (PropagateShadow) { 
-        // Choose between PassThru's and the loaded value's origins. 
-        Value *MaskedPassThruShadow = IRB.CreateAnd( 
-            getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy)); 
- 
-        Value *Acc = IRB.CreateExtractElement( 
-            MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), 0)); 
-        for (int i = 1, N = cast<FixedVectorType>(PassThru->getType()) 
-                                ->getNumElements(); 
-             i < N; ++i) { 
-          Value *More = IRB.CreateExtractElement( 
-              MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), i)); 
-          Acc = IRB.CreateOr(Acc, More); 
-        } 
- 
-        Value *Origin = IRB.CreateSelect( 
-            IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())), 
-            getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr)); 
- 
-        setOrigin(&I, Origin); 
-      } else { 
-        setOrigin(&I, getCleanOrigin()); 
-      } 
-    } 
-    return true; 
-  } 
- 
-  // Instrument BMI / BMI2 intrinsics. 
-  // All of these intrinsics are Z = I(X, Y) 
-  // where the types of all operands and the result match, and are either i32 or i64. 
-  // The following instrumentation happens to work for all of them: 
-  //   Sz = I(Sx, Y) | (sext (Sy != 0)) 
-  void handleBmiIntrinsic(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Type *ShadowTy = getShadowTy(&I); 
- 
-    // If any bit of the mask operand is poisoned, then the whole thing is. 
-    Value *SMask = getShadow(&I, 1); 
-    SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)), 
-                           ShadowTy); 
-    // Apply the same intrinsic to the shadow of the first operand. 
-    Value *S = IRB.CreateCall(I.getCalledFunction(), 
-                              {getShadow(&I, 0), I.getOperand(1)}); 
-    S = IRB.CreateOr(SMask, S); 
-    setShadow(&I, S); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) { 
-    SmallVector<int, 8> Mask; 
-    for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) { 
-      Mask.append(2, X); 
-    } 
-    return Mask; 
-  } 
- 
-  // Instrument pclmul intrinsics. 
-  // These intrinsics operate either on odd or on even elements of the input 
-  // vectors, depending on the constant in the 3rd argument, ignoring the rest. 
-  // Replace the unused elements with copies of the used ones, ex: 
-  //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case) 
-  // or 
-  //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case) 
-  // and then apply the usual shadow combining logic. 
-  void handlePclmulIntrinsic(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    unsigned Width = 
-        cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements(); 
-    assert(isa<ConstantInt>(I.getArgOperand(2)) && 
-           "pclmul 3rd operand must be a constant"); 
-    unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue(); 
+  // Valid (non-poisoned) unset bits in the operand pull down the
+  // corresponding shadow bits.
+  void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *OperandShadow = getShadow(&I, 0);
+    Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
+    // Bit N is clean if any field's bit N is 0 and unpoison
+    Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
+    // Otherwise, it is clean if every field's bit N is unpoison
+    Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
+    Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
+
+    setShadow(&I, S);
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void handleStmxcsr(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value* Addr = I.getArgOperand(0);
+    Type *Ty = IRB.getInt32Ty();
+    Value *ShadowPtr =
+        getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
+
+    IRB.CreateStore(getCleanShadow(Ty),
+                    IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
+
+    if (ClCheckAccessAddress)
+      insertShadowCheck(Addr, &I);
+  }
+
+  void handleLdmxcsr(IntrinsicInst &I) {
+    if (!InsertChecks) return;
+
+    IRBuilder<> IRB(&I);
+    Value *Addr = I.getArgOperand(0);
+    Type *Ty = IRB.getInt32Ty();
+    const Align Alignment = Align(1);
+    Value *ShadowPtr, *OriginPtr;
+    std::tie(ShadowPtr, OriginPtr) =
+        getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
+
+    if (ClCheckAccessAddress)
+      insertShadowCheck(Addr, &I);
+
+    Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
+    Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
+                                    : getCleanOrigin();
+    insertShadowCheck(Shadow, Origin, &I);
+  }
+
+  void handleMaskedStore(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *V = I.getArgOperand(0);
+    Value *Addr = I.getArgOperand(1);
+    const Align Alignment(
+        cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
+    Value *Mask = I.getArgOperand(3);
+    Value *Shadow = getShadow(V);
+
+    Value *ShadowPtr;
+    Value *OriginPtr;
+    std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
+        Addr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
+
+    if (ClCheckAccessAddress) {
+      insertShadowCheck(Addr, &I);
+      // Uninitialized mask is kind of like uninitialized address, but not as
+      // scary.
+      insertShadowCheck(Mask, &I);
+    }
+
+    IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask);
+
+    if (MS.TrackOrigins) {
+      auto &DL = F.getParent()->getDataLayout();
+      paintOrigin(IRB, getOrigin(V), OriginPtr,
+                  DL.getTypeStoreSize(Shadow->getType()),
+                  std::max(Alignment, kMinOriginAlignment));
+    }
+  }
+
+  bool handleMaskedLoad(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *Addr = I.getArgOperand(0);
+    const Align Alignment(
+        cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
+    Value *Mask = I.getArgOperand(2);
+    Value *PassThru = I.getArgOperand(3);
+
+    Type *ShadowTy = getShadowTy(&I);
+    Value *ShadowPtr, *OriginPtr;
+    if (PropagateShadow) {
+      std::tie(ShadowPtr, OriginPtr) =
+          getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
+      setShadow(&I, IRB.CreateMaskedLoad(ShadowPtr, Alignment, Mask,
+                                         getShadow(PassThru), "_msmaskedld"));
+    } else {
+      setShadow(&I, getCleanShadow(&I));
+    }
+
+    if (ClCheckAccessAddress) {
+      insertShadowCheck(Addr, &I);
+      insertShadowCheck(Mask, &I);
+    }
+
+    if (MS.TrackOrigins) {
+      if (PropagateShadow) {
+        // Choose between PassThru's and the loaded value's origins.
+        Value *MaskedPassThruShadow = IRB.CreateAnd(
+            getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
+
+        Value *Acc = IRB.CreateExtractElement(
+            MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+        for (int i = 1, N = cast<FixedVectorType>(PassThru->getType())
+                                ->getNumElements();
+             i < N; ++i) {
+          Value *More = IRB.CreateExtractElement(
+              MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), i));
+          Acc = IRB.CreateOr(Acc, More);
+        }
+
+        Value *Origin = IRB.CreateSelect(
+            IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
+            getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
+
+        setOrigin(&I, Origin);
+      } else {
+        setOrigin(&I, getCleanOrigin());
+      }
+    }
+    return true;
+  }
+
+  // Instrument BMI / BMI2 intrinsics.
+  // All of these intrinsics are Z = I(X, Y)
+  // where the types of all operands and the result match, and are either i32 or i64.
+  // The following instrumentation happens to work for all of them:
+  //   Sz = I(Sx, Y) | (sext (Sy != 0))
+  void handleBmiIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Type *ShadowTy = getShadowTy(&I);
+
+    // If any bit of the mask operand is poisoned, then the whole thing is.
+    Value *SMask = getShadow(&I, 1);
+    SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
+                           ShadowTy);
+    // Apply the same intrinsic to the shadow of the first operand.
+    Value *S = IRB.CreateCall(I.getCalledFunction(),
+                              {getShadow(&I, 0), I.getOperand(1)});
+    S = IRB.CreateOr(SMask, S);
+    setShadow(&I, S);
+    setOriginForNaryOp(I);
+  }
+
+  SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
+    SmallVector<int, 8> Mask;
+    for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
+      Mask.append(2, X);
+    }
+    return Mask;
+  }
+
+  // Instrument pclmul intrinsics.
+  // These intrinsics operate either on odd or on even elements of the input
+  // vectors, depending on the constant in the 3rd argument, ignoring the rest.
+  // Replace the unused elements with copies of the used ones, ex:
+  //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
+  // or
+  //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
+  // and then apply the usual shadow combining logic.
+  void handlePclmulIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    unsigned Width =
+        cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
+    assert(isa<ConstantInt>(I.getArgOperand(2)) &&
+           "pclmul 3rd operand must be a constant");
+    unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
     Value *Shuf0 = IRB.CreateShuffleVector(getShadow(&I, 0),
                                            getPclmulMask(Width, Imm & 0x01));
     Value *Shuf1 = IRB.CreateShuffleVector(getShadow(&I, 1),
                                            getPclmulMask(Width, Imm & 0x10));
-    ShadowAndOriginCombiner SOC(this, IRB); 
-    SOC.Add(Shuf0, getOrigin(&I, 0)); 
-    SOC.Add(Shuf1, getOrigin(&I, 1)); 
-    SOC.Done(&I); 
-  } 
- 
-  // Instrument _mm_*_sd intrinsics 
-  void handleUnarySdIntrinsic(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *First = getShadow(&I, 0); 
-    Value *Second = getShadow(&I, 1); 
-    // High word of first operand, low word of second 
-    Value *Shadow = 
-        IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef<int>({2, 1})); 
- 
-    setShadow(&I, Shadow); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  void handleBinarySdIntrinsic(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *First = getShadow(&I, 0); 
-    Value *Second = getShadow(&I, 1); 
-    Value *OrShadow = IRB.CreateOr(First, Second); 
-    // High word of first operand, low word of both OR'd together 
-    Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, 
-                                            llvm::makeArrayRef<int>({2, 1})); 
- 
-    setShadow(&I, Shadow); 
-    setOriginForNaryOp(I); 
-  } 
- 
+    ShadowAndOriginCombiner SOC(this, IRB);
+    SOC.Add(Shuf0, getOrigin(&I, 0));
+    SOC.Add(Shuf1, getOrigin(&I, 1));
+    SOC.Done(&I);
+  }
+
+  // Instrument _mm_*_sd intrinsics
+  void handleUnarySdIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *First = getShadow(&I, 0);
+    Value *Second = getShadow(&I, 1);
+    // High word of first operand, low word of second
+    Value *Shadow =
+        IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef<int>({2, 1}));
+
+    setShadow(&I, Shadow);
+    setOriginForNaryOp(I);
+  }
+
+  void handleBinarySdIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *First = getShadow(&I, 0);
+    Value *Second = getShadow(&I, 1);
+    Value *OrShadow = IRB.CreateOr(First, Second);
+    // High word of first operand, low word of both OR'd together
+    Value *Shadow = IRB.CreateShuffleVector(First, OrShadow,
+                                            llvm::makeArrayRef<int>({2, 1}));
+
+    setShadow(&I, Shadow);
+    setOriginForNaryOp(I);
+  }
+
   // Instrument abs intrinsic.
   // handleUnknownIntrinsic can't handle it because of the last
   // is_int_min_poison argument which does not match the result type.
@@ -3244,282 +3244,282 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOrigin(&I, getOrigin(&I, 0));
   }
 
-  void visitIntrinsicInst(IntrinsicInst &I) { 
-    switch (I.getIntrinsicID()) { 
+  void visitIntrinsicInst(IntrinsicInst &I) {
+    switch (I.getIntrinsicID()) {
     case Intrinsic::abs:
       handleAbsIntrinsic(I);
       break;
-    case Intrinsic::lifetime_start: 
-      handleLifetimeStart(I); 
-      break; 
-    case Intrinsic::launder_invariant_group: 
-    case Intrinsic::strip_invariant_group: 
-      handleInvariantGroup(I); 
-      break; 
-    case Intrinsic::bswap: 
-      handleBswap(I); 
-      break; 
-    case Intrinsic::masked_store: 
-      handleMaskedStore(I); 
-      break; 
-    case Intrinsic::masked_load: 
-      handleMaskedLoad(I); 
-      break; 
+    case Intrinsic::lifetime_start:
+      handleLifetimeStart(I);
+      break;
+    case Intrinsic::launder_invariant_group:
+    case Intrinsic::strip_invariant_group:
+      handleInvariantGroup(I);
+      break;
+    case Intrinsic::bswap:
+      handleBswap(I);
+      break;
+    case Intrinsic::masked_store:
+      handleMaskedStore(I);
+      break;
+    case Intrinsic::masked_load:
+      handleMaskedLoad(I);
+      break;
     case Intrinsic::vector_reduce_and:
-      handleVectorReduceAndIntrinsic(I); 
-      break; 
+      handleVectorReduceAndIntrinsic(I);
+      break;
     case Intrinsic::vector_reduce_or:
-      handleVectorReduceOrIntrinsic(I); 
-      break; 
+      handleVectorReduceOrIntrinsic(I);
+      break;
     case Intrinsic::vector_reduce_add:
     case Intrinsic::vector_reduce_xor:
     case Intrinsic::vector_reduce_mul:
-      handleVectorReduceIntrinsic(I); 
-      break; 
-    case Intrinsic::x86_sse_stmxcsr: 
-      handleStmxcsr(I); 
-      break; 
-    case Intrinsic::x86_sse_ldmxcsr: 
-      handleLdmxcsr(I); 
-      break; 
-    case Intrinsic::x86_avx512_vcvtsd2usi64: 
-    case Intrinsic::x86_avx512_vcvtsd2usi32: 
-    case Intrinsic::x86_avx512_vcvtss2usi64: 
-    case Intrinsic::x86_avx512_vcvtss2usi32: 
-    case Intrinsic::x86_avx512_cvttss2usi64: 
-    case Intrinsic::x86_avx512_cvttss2usi: 
-    case Intrinsic::x86_avx512_cvttsd2usi64: 
-    case Intrinsic::x86_avx512_cvttsd2usi: 
-    case Intrinsic::x86_avx512_cvtusi2ss: 
-    case Intrinsic::x86_avx512_cvtusi642sd: 
-    case Intrinsic::x86_avx512_cvtusi642ss: 
+      handleVectorReduceIntrinsic(I);
+      break;
+    case Intrinsic::x86_sse_stmxcsr:
+      handleStmxcsr(I);
+      break;
+    case Intrinsic::x86_sse_ldmxcsr:
+      handleLdmxcsr(I);
+      break;
+    case Intrinsic::x86_avx512_vcvtsd2usi64:
+    case Intrinsic::x86_avx512_vcvtsd2usi32:
+    case Intrinsic::x86_avx512_vcvtss2usi64:
+    case Intrinsic::x86_avx512_vcvtss2usi32:
+    case Intrinsic::x86_avx512_cvttss2usi64:
+    case Intrinsic::x86_avx512_cvttss2usi:
+    case Intrinsic::x86_avx512_cvttsd2usi64:
+    case Intrinsic::x86_avx512_cvttsd2usi:
+    case Intrinsic::x86_avx512_cvtusi2ss:
+    case Intrinsic::x86_avx512_cvtusi642sd:
+    case Intrinsic::x86_avx512_cvtusi642ss:
       handleVectorConvertIntrinsic(I, 1, true);
       break;
-    case Intrinsic::x86_sse2_cvtsd2si64: 
-    case Intrinsic::x86_sse2_cvtsd2si: 
-    case Intrinsic::x86_sse2_cvtsd2ss: 
-    case Intrinsic::x86_sse2_cvttsd2si64: 
-    case Intrinsic::x86_sse2_cvttsd2si: 
-    case Intrinsic::x86_sse_cvtss2si64: 
-    case Intrinsic::x86_sse_cvtss2si: 
-    case Intrinsic::x86_sse_cvttss2si64: 
-    case Intrinsic::x86_sse_cvttss2si: 
-      handleVectorConvertIntrinsic(I, 1); 
-      break; 
-    case Intrinsic::x86_sse_cvtps2pi: 
-    case Intrinsic::x86_sse_cvttps2pi: 
-      handleVectorConvertIntrinsic(I, 2); 
-      break; 
- 
-    case Intrinsic::x86_avx512_psll_w_512: 
-    case Intrinsic::x86_avx512_psll_d_512: 
-    case Intrinsic::x86_avx512_psll_q_512: 
-    case Intrinsic::x86_avx512_pslli_w_512: 
-    case Intrinsic::x86_avx512_pslli_d_512: 
-    case Intrinsic::x86_avx512_pslli_q_512: 
-    case Intrinsic::x86_avx512_psrl_w_512: 
-    case Intrinsic::x86_avx512_psrl_d_512: 
-    case Intrinsic::x86_avx512_psrl_q_512: 
-    case Intrinsic::x86_avx512_psra_w_512: 
-    case Intrinsic::x86_avx512_psra_d_512: 
-    case Intrinsic::x86_avx512_psra_q_512: 
-    case Intrinsic::x86_avx512_psrli_w_512: 
-    case Intrinsic::x86_avx512_psrli_d_512: 
-    case Intrinsic::x86_avx512_psrli_q_512: 
-    case Intrinsic::x86_avx512_psrai_w_512: 
-    case Intrinsic::x86_avx512_psrai_d_512: 
-    case Intrinsic::x86_avx512_psrai_q_512: 
-    case Intrinsic::x86_avx512_psra_q_256: 
-    case Intrinsic::x86_avx512_psra_q_128: 
-    case Intrinsic::x86_avx512_psrai_q_256: 
-    case Intrinsic::x86_avx512_psrai_q_128: 
-    case Intrinsic::x86_avx2_psll_w: 
-    case Intrinsic::x86_avx2_psll_d: 
-    case Intrinsic::x86_avx2_psll_q: 
-    case Intrinsic::x86_avx2_pslli_w: 
-    case Intrinsic::x86_avx2_pslli_d: 
-    case Intrinsic::x86_avx2_pslli_q: 
-    case Intrinsic::x86_avx2_psrl_w: 
-    case Intrinsic::x86_avx2_psrl_d: 
-    case Intrinsic::x86_avx2_psrl_q: 
-    case Intrinsic::x86_avx2_psra_w: 
-    case Intrinsic::x86_avx2_psra_d: 
-    case Intrinsic::x86_avx2_psrli_w: 
-    case Intrinsic::x86_avx2_psrli_d: 
-    case Intrinsic::x86_avx2_psrli_q: 
-    case Intrinsic::x86_avx2_psrai_w: 
-    case Intrinsic::x86_avx2_psrai_d: 
-    case Intrinsic::x86_sse2_psll_w: 
-    case Intrinsic::x86_sse2_psll_d: 
-    case Intrinsic::x86_sse2_psll_q: 
-    case Intrinsic::x86_sse2_pslli_w: 
-    case Intrinsic::x86_sse2_pslli_d: 
-    case Intrinsic::x86_sse2_pslli_q: 
-    case Intrinsic::x86_sse2_psrl_w: 
-    case Intrinsic::x86_sse2_psrl_d: 
-    case Intrinsic::x86_sse2_psrl_q: 
-    case Intrinsic::x86_sse2_psra_w: 
-    case Intrinsic::x86_sse2_psra_d: 
-    case Intrinsic::x86_sse2_psrli_w: 
-    case Intrinsic::x86_sse2_psrli_d: 
-    case Intrinsic::x86_sse2_psrli_q: 
-    case Intrinsic::x86_sse2_psrai_w: 
-    case Intrinsic::x86_sse2_psrai_d: 
-    case Intrinsic::x86_mmx_psll_w: 
-    case Intrinsic::x86_mmx_psll_d: 
-    case Intrinsic::x86_mmx_psll_q: 
-    case Intrinsic::x86_mmx_pslli_w: 
-    case Intrinsic::x86_mmx_pslli_d: 
-    case Intrinsic::x86_mmx_pslli_q: 
-    case Intrinsic::x86_mmx_psrl_w: 
-    case Intrinsic::x86_mmx_psrl_d: 
-    case Intrinsic::x86_mmx_psrl_q: 
-    case Intrinsic::x86_mmx_psra_w: 
-    case Intrinsic::x86_mmx_psra_d: 
-    case Intrinsic::x86_mmx_psrli_w: 
-    case Intrinsic::x86_mmx_psrli_d: 
-    case Intrinsic::x86_mmx_psrli_q: 
-    case Intrinsic::x86_mmx_psrai_w: 
-    case Intrinsic::x86_mmx_psrai_d: 
-      handleVectorShiftIntrinsic(I, /* Variable */ false); 
-      break; 
-    case Intrinsic::x86_avx2_psllv_d: 
-    case Intrinsic::x86_avx2_psllv_d_256: 
-    case Intrinsic::x86_avx512_psllv_d_512: 
-    case Intrinsic::x86_avx2_psllv_q: 
-    case Intrinsic::x86_avx2_psllv_q_256: 
-    case Intrinsic::x86_avx512_psllv_q_512: 
-    case Intrinsic::x86_avx2_psrlv_d: 
-    case Intrinsic::x86_avx2_psrlv_d_256: 
-    case Intrinsic::x86_avx512_psrlv_d_512: 
-    case Intrinsic::x86_avx2_psrlv_q: 
-    case Intrinsic::x86_avx2_psrlv_q_256: 
-    case Intrinsic::x86_avx512_psrlv_q_512: 
-    case Intrinsic::x86_avx2_psrav_d: 
-    case Intrinsic::x86_avx2_psrav_d_256: 
-    case Intrinsic::x86_avx512_psrav_d_512: 
-    case Intrinsic::x86_avx512_psrav_q_128: 
-    case Intrinsic::x86_avx512_psrav_q_256: 
-    case Intrinsic::x86_avx512_psrav_q_512: 
-      handleVectorShiftIntrinsic(I, /* Variable */ true); 
-      break; 
- 
-    case Intrinsic::x86_sse2_packsswb_128: 
-    case Intrinsic::x86_sse2_packssdw_128: 
-    case Intrinsic::x86_sse2_packuswb_128: 
-    case Intrinsic::x86_sse41_packusdw: 
-    case Intrinsic::x86_avx2_packsswb: 
-    case Intrinsic::x86_avx2_packssdw: 
-    case Intrinsic::x86_avx2_packuswb: 
-    case Intrinsic::x86_avx2_packusdw: 
-      handleVectorPackIntrinsic(I); 
-      break; 
- 
-    case Intrinsic::x86_mmx_packsswb: 
-    case Intrinsic::x86_mmx_packuswb: 
-      handleVectorPackIntrinsic(I, 16); 
-      break; 
- 
-    case Intrinsic::x86_mmx_packssdw: 
-      handleVectorPackIntrinsic(I, 32); 
-      break; 
- 
-    case Intrinsic::x86_mmx_psad_bw: 
-    case Intrinsic::x86_sse2_psad_bw: 
-    case Intrinsic::x86_avx2_psad_bw: 
-      handleVectorSadIntrinsic(I); 
-      break; 
- 
-    case Intrinsic::x86_sse2_pmadd_wd: 
-    case Intrinsic::x86_avx2_pmadd_wd: 
-    case Intrinsic::x86_ssse3_pmadd_ub_sw_128: 
-    case Intrinsic::x86_avx2_pmadd_ub_sw: 
-      handleVectorPmaddIntrinsic(I); 
-      break; 
- 
-    case Intrinsic::x86_ssse3_pmadd_ub_sw: 
-      handleVectorPmaddIntrinsic(I, 8); 
-      break; 
- 
-    case Intrinsic::x86_mmx_pmadd_wd: 
-      handleVectorPmaddIntrinsic(I, 16); 
-      break; 
- 
-    case Intrinsic::x86_sse_cmp_ss: 
-    case Intrinsic::x86_sse2_cmp_sd: 
-    case Intrinsic::x86_sse_comieq_ss: 
-    case Intrinsic::x86_sse_comilt_ss: 
-    case Intrinsic::x86_sse_comile_ss: 
-    case Intrinsic::x86_sse_comigt_ss: 
-    case Intrinsic::x86_sse_comige_ss: 
-    case Intrinsic::x86_sse_comineq_ss: 
-    case Intrinsic::x86_sse_ucomieq_ss: 
-    case Intrinsic::x86_sse_ucomilt_ss: 
-    case Intrinsic::x86_sse_ucomile_ss: 
-    case Intrinsic::x86_sse_ucomigt_ss: 
-    case Intrinsic::x86_sse_ucomige_ss: 
-    case Intrinsic::x86_sse_ucomineq_ss: 
-    case Intrinsic::x86_sse2_comieq_sd: 
-    case Intrinsic::x86_sse2_comilt_sd: 
-    case Intrinsic::x86_sse2_comile_sd: 
-    case Intrinsic::x86_sse2_comigt_sd: 
-    case Intrinsic::x86_sse2_comige_sd: 
-    case Intrinsic::x86_sse2_comineq_sd: 
-    case Intrinsic::x86_sse2_ucomieq_sd: 
-    case Intrinsic::x86_sse2_ucomilt_sd: 
-    case Intrinsic::x86_sse2_ucomile_sd: 
-    case Intrinsic::x86_sse2_ucomigt_sd: 
-    case Intrinsic::x86_sse2_ucomige_sd: 
-    case Intrinsic::x86_sse2_ucomineq_sd: 
-      handleVectorCompareScalarIntrinsic(I); 
-      break; 
- 
-    case Intrinsic::x86_sse_cmp_ps: 
-    case Intrinsic::x86_sse2_cmp_pd: 
-      // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function 
-      // generates reasonably looking IR that fails in the backend with "Do not 
-      // know how to split the result of this operator!". 
-      handleVectorComparePackedIntrinsic(I); 
-      break; 
- 
-    case Intrinsic::x86_bmi_bextr_32: 
-    case Intrinsic::x86_bmi_bextr_64: 
-    case Intrinsic::x86_bmi_bzhi_32: 
-    case Intrinsic::x86_bmi_bzhi_64: 
-    case Intrinsic::x86_bmi_pdep_32: 
-    case Intrinsic::x86_bmi_pdep_64: 
-    case Intrinsic::x86_bmi_pext_32: 
-    case Intrinsic::x86_bmi_pext_64: 
-      handleBmiIntrinsic(I); 
-      break; 
- 
-    case Intrinsic::x86_pclmulqdq: 
-    case Intrinsic::x86_pclmulqdq_256: 
-    case Intrinsic::x86_pclmulqdq_512: 
-      handlePclmulIntrinsic(I); 
-      break; 
- 
-    case Intrinsic::x86_sse41_round_sd: 
-      handleUnarySdIntrinsic(I); 
-      break; 
-    case Intrinsic::x86_sse2_max_sd: 
-    case Intrinsic::x86_sse2_min_sd: 
-      handleBinarySdIntrinsic(I); 
-      break; 
- 
-    case Intrinsic::is_constant: 
-      // The result of llvm.is.constant() is always defined. 
-      setShadow(&I, getCleanShadow(&I)); 
-      setOrigin(&I, getCleanOrigin()); 
-      break; 
- 
-    default: 
-      if (!handleUnknownIntrinsic(I)) 
-        visitInstruction(I); 
-      break; 
-    } 
-  } 
- 
+    case Intrinsic::x86_sse2_cvtsd2si64:
+    case Intrinsic::x86_sse2_cvtsd2si:
+    case Intrinsic::x86_sse2_cvtsd2ss:
+    case Intrinsic::x86_sse2_cvttsd2si64:
+    case Intrinsic::x86_sse2_cvttsd2si:
+    case Intrinsic::x86_sse_cvtss2si64:
+    case Intrinsic::x86_sse_cvtss2si:
+    case Intrinsic::x86_sse_cvttss2si64:
+    case Intrinsic::x86_sse_cvttss2si:
+      handleVectorConvertIntrinsic(I, 1);
+      break;
+    case Intrinsic::x86_sse_cvtps2pi:
+    case Intrinsic::x86_sse_cvttps2pi:
+      handleVectorConvertIntrinsic(I, 2);
+      break;
+
+    case Intrinsic::x86_avx512_psll_w_512:
+    case Intrinsic::x86_avx512_psll_d_512:
+    case Intrinsic::x86_avx512_psll_q_512:
+    case Intrinsic::x86_avx512_pslli_w_512:
+    case Intrinsic::x86_avx512_pslli_d_512:
+    case Intrinsic::x86_avx512_pslli_q_512:
+    case Intrinsic::x86_avx512_psrl_w_512:
+    case Intrinsic::x86_avx512_psrl_d_512:
+    case Intrinsic::x86_avx512_psrl_q_512:
+    case Intrinsic::x86_avx512_psra_w_512:
+    case Intrinsic::x86_avx512_psra_d_512:
+    case Intrinsic::x86_avx512_psra_q_512:
+    case Intrinsic::x86_avx512_psrli_w_512:
+    case Intrinsic::x86_avx512_psrli_d_512:
+    case Intrinsic::x86_avx512_psrli_q_512:
+    case Intrinsic::x86_avx512_psrai_w_512:
+    case Intrinsic::x86_avx512_psrai_d_512:
+    case Intrinsic::x86_avx512_psrai_q_512:
+    case Intrinsic::x86_avx512_psra_q_256:
+    case Intrinsic::x86_avx512_psra_q_128:
+    case Intrinsic::x86_avx512_psrai_q_256:
+    case Intrinsic::x86_avx512_psrai_q_128:
+    case Intrinsic::x86_avx2_psll_w:
+    case Intrinsic::x86_avx2_psll_d:
+    case Intrinsic::x86_avx2_psll_q:
+    case Intrinsic::x86_avx2_pslli_w:
+    case Intrinsic::x86_avx2_pslli_d:
+    case Intrinsic::x86_avx2_pslli_q:
+    case Intrinsic::x86_avx2_psrl_w:
+    case Intrinsic::x86_avx2_psrl_d:
+    case Intrinsic::x86_avx2_psrl_q:
+    case Intrinsic::x86_avx2_psra_w:
+    case Intrinsic::x86_avx2_psra_d:
+    case Intrinsic::x86_avx2_psrli_w:
+    case Intrinsic::x86_avx2_psrli_d:
+    case Intrinsic::x86_avx2_psrli_q:
+    case Intrinsic::x86_avx2_psrai_w:
+    case Intrinsic::x86_avx2_psrai_d:
+    case Intrinsic::x86_sse2_psll_w:
+    case Intrinsic::x86_sse2_psll_d:
+    case Intrinsic::x86_sse2_psll_q:
+    case Intrinsic::x86_sse2_pslli_w:
+    case Intrinsic::x86_sse2_pslli_d:
+    case Intrinsic::x86_sse2_pslli_q:
+    case Intrinsic::x86_sse2_psrl_w:
+    case Intrinsic::x86_sse2_psrl_d:
+    case Intrinsic::x86_sse2_psrl_q:
+    case Intrinsic::x86_sse2_psra_w:
+    case Intrinsic::x86_sse2_psra_d:
+    case Intrinsic::x86_sse2_psrli_w:
+    case Intrinsic::x86_sse2_psrli_d:
+    case Intrinsic::x86_sse2_psrli_q:
+    case Intrinsic::x86_sse2_psrai_w:
+    case Intrinsic::x86_sse2_psrai_d:
+    case Intrinsic::x86_mmx_psll_w:
+    case Intrinsic::x86_mmx_psll_d:
+    case Intrinsic::x86_mmx_psll_q:
+    case Intrinsic::x86_mmx_pslli_w:
+    case Intrinsic::x86_mmx_pslli_d:
+    case Intrinsic::x86_mmx_pslli_q:
+    case Intrinsic::x86_mmx_psrl_w:
+    case Intrinsic::x86_mmx_psrl_d:
+    case Intrinsic::x86_mmx_psrl_q:
+    case Intrinsic::x86_mmx_psra_w:
+    case Intrinsic::x86_mmx_psra_d:
+    case Intrinsic::x86_mmx_psrli_w:
+    case Intrinsic::x86_mmx_psrli_d:
+    case Intrinsic::x86_mmx_psrli_q:
+    case Intrinsic::x86_mmx_psrai_w:
+    case Intrinsic::x86_mmx_psrai_d:
+      handleVectorShiftIntrinsic(I, /* Variable */ false);
+      break;
+    case Intrinsic::x86_avx2_psllv_d:
+    case Intrinsic::x86_avx2_psllv_d_256:
+    case Intrinsic::x86_avx512_psllv_d_512:
+    case Intrinsic::x86_avx2_psllv_q:
+    case Intrinsic::x86_avx2_psllv_q_256:
+    case Intrinsic::x86_avx512_psllv_q_512:
+    case Intrinsic::x86_avx2_psrlv_d:
+    case Intrinsic::x86_avx2_psrlv_d_256:
+    case Intrinsic::x86_avx512_psrlv_d_512:
+    case Intrinsic::x86_avx2_psrlv_q:
+    case Intrinsic::x86_avx2_psrlv_q_256:
+    case Intrinsic::x86_avx512_psrlv_q_512:
+    case Intrinsic::x86_avx2_psrav_d:
+    case Intrinsic::x86_avx2_psrav_d_256:
+    case Intrinsic::x86_avx512_psrav_d_512:
+    case Intrinsic::x86_avx512_psrav_q_128:
+    case Intrinsic::x86_avx512_psrav_q_256:
+    case Intrinsic::x86_avx512_psrav_q_512:
+      handleVectorShiftIntrinsic(I, /* Variable */ true);
+      break;
+
+    case Intrinsic::x86_sse2_packsswb_128:
+    case Intrinsic::x86_sse2_packssdw_128:
+    case Intrinsic::x86_sse2_packuswb_128:
+    case Intrinsic::x86_sse41_packusdw:
+    case Intrinsic::x86_avx2_packsswb:
+    case Intrinsic::x86_avx2_packssdw:
+    case Intrinsic::x86_avx2_packuswb:
+    case Intrinsic::x86_avx2_packusdw:
+      handleVectorPackIntrinsic(I);
+      break;
+
+    case Intrinsic::x86_mmx_packsswb:
+    case Intrinsic::x86_mmx_packuswb:
+      handleVectorPackIntrinsic(I, 16);
+      break;
+
+    case Intrinsic::x86_mmx_packssdw:
+      handleVectorPackIntrinsic(I, 32);
+      break;
+
+    case Intrinsic::x86_mmx_psad_bw:
+    case Intrinsic::x86_sse2_psad_bw:
+    case Intrinsic::x86_avx2_psad_bw:
+      handleVectorSadIntrinsic(I);
+      break;
+
+    case Intrinsic::x86_sse2_pmadd_wd:
+    case Intrinsic::x86_avx2_pmadd_wd:
+    case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
+    case Intrinsic::x86_avx2_pmadd_ub_sw:
+      handleVectorPmaddIntrinsic(I);
+      break;
+
+    case Intrinsic::x86_ssse3_pmadd_ub_sw:
+      handleVectorPmaddIntrinsic(I, 8);
+      break;
+
+    case Intrinsic::x86_mmx_pmadd_wd:
+      handleVectorPmaddIntrinsic(I, 16);
+      break;
+
+    case Intrinsic::x86_sse_cmp_ss:
+    case Intrinsic::x86_sse2_cmp_sd:
+    case Intrinsic::x86_sse_comieq_ss:
+    case Intrinsic::x86_sse_comilt_ss:
+    case Intrinsic::x86_sse_comile_ss:
+    case Intrinsic::x86_sse_comigt_ss:
+    case Intrinsic::x86_sse_comige_ss:
+    case Intrinsic::x86_sse_comineq_ss:
+    case Intrinsic::x86_sse_ucomieq_ss:
+    case Intrinsic::x86_sse_ucomilt_ss:
+    case Intrinsic::x86_sse_ucomile_ss:
+    case Intrinsic::x86_sse_ucomigt_ss:
+    case Intrinsic::x86_sse_ucomige_ss:
+    case Intrinsic::x86_sse_ucomineq_ss:
+    case Intrinsic::x86_sse2_comieq_sd:
+    case Intrinsic::x86_sse2_comilt_sd:
+    case Intrinsic::x86_sse2_comile_sd:
+    case Intrinsic::x86_sse2_comigt_sd:
+    case Intrinsic::x86_sse2_comige_sd:
+    case Intrinsic::x86_sse2_comineq_sd:
+    case Intrinsic::x86_sse2_ucomieq_sd:
+    case Intrinsic::x86_sse2_ucomilt_sd:
+    case Intrinsic::x86_sse2_ucomile_sd:
+    case Intrinsic::x86_sse2_ucomigt_sd:
+    case Intrinsic::x86_sse2_ucomige_sd:
+    case Intrinsic::x86_sse2_ucomineq_sd:
+      handleVectorCompareScalarIntrinsic(I);
+      break;
+
+    case Intrinsic::x86_sse_cmp_ps:
+    case Intrinsic::x86_sse2_cmp_pd:
+      // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function
+      // generates reasonably looking IR that fails in the backend with "Do not
+      // know how to split the result of this operator!".
+      handleVectorComparePackedIntrinsic(I);
+      break;
+
+    case Intrinsic::x86_bmi_bextr_32:
+    case Intrinsic::x86_bmi_bextr_64:
+    case Intrinsic::x86_bmi_bzhi_32:
+    case Intrinsic::x86_bmi_bzhi_64:
+    case Intrinsic::x86_bmi_pdep_32:
+    case Intrinsic::x86_bmi_pdep_64:
+    case Intrinsic::x86_bmi_pext_32:
+    case Intrinsic::x86_bmi_pext_64:
+      handleBmiIntrinsic(I);
+      break;
+
+    case Intrinsic::x86_pclmulqdq:
+    case Intrinsic::x86_pclmulqdq_256:
+    case Intrinsic::x86_pclmulqdq_512:
+      handlePclmulIntrinsic(I);
+      break;
+
+    case Intrinsic::x86_sse41_round_sd:
+      handleUnarySdIntrinsic(I);
+      break;
+    case Intrinsic::x86_sse2_max_sd:
+    case Intrinsic::x86_sse2_min_sd:
+      handleBinarySdIntrinsic(I);
+      break;
+
+    case Intrinsic::is_constant:
+      // The result of llvm.is.constant() is always defined.
+      setShadow(&I, getCleanShadow(&I));
+      setOrigin(&I, getCleanOrigin());
+      break;
+
+    default:
+      if (!handleUnknownIntrinsic(I))
+        visitInstruction(I);
+      break;
+    }
+  }
+
   void visitLibAtomicLoad(CallBase &CB) {
     // Since we use getNextNode here, we can't have CB terminate the BB.
     assert(isa<CallInst>(CB));
@@ -3577,19 +3577,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                      Align(1));
   }
 
-  void visitCallBase(CallBase &CB) { 
-    assert(!CB.getMetadata("nosanitize")); 
-    if (CB.isInlineAsm()) { 
-      // For inline asm (either a call to asm function, or callbr instruction), 
-      // do the usual thing: check argument shadow and mark all outputs as 
-      // clean. Note that any side effects of the inline asm that are not 
-      // immediately visible in its constraints are not handled. 
-      if (ClHandleAsmConservative && MS.CompileKernel) 
-        visitAsmInstruction(CB); 
-      else 
-        visitInstruction(CB); 
-      return; 
-    } 
+  void visitCallBase(CallBase &CB) {
+    assert(!CB.getMetadata("nosanitize"));
+    if (CB.isInlineAsm()) {
+      // For inline asm (either a call to asm function, or callbr instruction),
+      // do the usual thing: check argument shadow and mark all outputs as
+      // clean. Note that any side effects of the inline asm that are not
+      // immediately visible in its constraints are not handled.
+      if (ClHandleAsmConservative && MS.CompileKernel)
+        visitAsmInstruction(CB);
+      else
+        visitInstruction(CB);
+      return;
+    }
     LibFunc LF;
     if (TLI->getLibFunc(CB, LF)) {
       // libatomic.a functions need to have special handling because there isn't
@@ -3612,13 +3612,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       }
     }
 
-    if (auto *Call = dyn_cast<CallInst>(&CB)) { 
-      assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere"); 
- 
-      // We are going to insert code that relies on the fact that the callee 
-      // will become a non-readonly function after it is instrumented by us. To 
-      // prevent this code from being optimized out, mark that function 
-      // non-readonly in advance. 
+    if (auto *Call = dyn_cast<CallInst>(&CB)) {
+      assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
+
+      // We are going to insert code that relies on the fact that the callee
+      // will become a non-readonly function after it is instrumented by us. To
+      // prevent this code from being optimized out, mark that function
+      // non-readonly in advance.
       AttrBuilder B;
       B.addAttribute(Attribute::ReadOnly)
           .addAttribute(Attribute::ReadNone)
@@ -3627,1693 +3627,1693 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
           .addAttribute(Attribute::Speculatable);
 
       Call->removeAttributes(AttributeList::FunctionIndex, B);
-      if (Function *Func = Call->getCalledFunction()) { 
-        Func->removeAttributes(AttributeList::FunctionIndex, B); 
-      } 
- 
-      maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI); 
-    } 
-    IRBuilder<> IRB(&CB); 
+      if (Function *Func = Call->getCalledFunction()) {
+        Func->removeAttributes(AttributeList::FunctionIndex, B);
+      }
+
+      maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
+    }
+    IRBuilder<> IRB(&CB);
     bool MayCheckCall = ClEagerChecks;
     if (Function *Func = CB.getCalledFunction()) {
       // __sanitizer_unaligned_{load,store} functions may be called by users
       // and always expects shadows in the TLS. So don't check them.
       MayCheckCall &= !Func->getName().startswith("__sanitizer_unaligned_");
     }
- 
-    unsigned ArgOffset = 0; 
-    LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n"); 
-    for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End; 
-         ++ArgIt) { 
-      Value *A = *ArgIt; 
-      unsigned i = ArgIt - CB.arg_begin(); 
-      if (!A->getType()->isSized()) { 
-        LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n"); 
-        continue; 
-      } 
-      unsigned Size = 0; 
-      Value *Store = nullptr; 
-      // Compute the Shadow for arg even if it is ByVal, because 
-      // in that case getShadow() will copy the actual arg shadow to 
-      // __msan_param_tls. 
-      Value *ArgShadow = getShadow(A); 
-      Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset); 
-      LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A 
-                        << " Shadow: " << *ArgShadow << "\n"); 
-      bool ArgIsInitialized = false; 
-      const DataLayout &DL = F.getParent()->getDataLayout(); 
- 
-      bool ByVal = CB.paramHasAttr(i, Attribute::ByVal); 
-      bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef); 
+
+    unsigned ArgOffset = 0;
+    LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n");
+    for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
+         ++ArgIt) {
+      Value *A = *ArgIt;
+      unsigned i = ArgIt - CB.arg_begin();
+      if (!A->getType()->isSized()) {
+        LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
+        continue;
+      }
+      unsigned Size = 0;
+      Value *Store = nullptr;
+      // Compute the Shadow for arg even if it is ByVal, because
+      // in that case getShadow() will copy the actual arg shadow to
+      // __msan_param_tls.
+      Value *ArgShadow = getShadow(A);
+      Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
+      LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
+                        << " Shadow: " << *ArgShadow << "\n");
+      bool ArgIsInitialized = false;
+      const DataLayout &DL = F.getParent()->getDataLayout();
+
+      bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
+      bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
       bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
- 
-      if (EagerCheck) { 
-        insertShadowCheck(A, &CB); 
-        continue; 
-      } 
-      if (ByVal) { 
-        // ByVal requires some special handling as it's too big for a single 
-        // load 
-        assert(A->getType()->isPointerTy() && 
-               "ByVal argument is not a pointer!"); 
-        Size = DL.getTypeAllocSize(CB.getParamByValType(i)); 
-        if (ArgOffset + Size > kParamTLSSize) break; 
-        const MaybeAlign ParamAlignment(CB.getParamAlign(i)); 
-        MaybeAlign Alignment = llvm::None; 
-        if (ParamAlignment) 
-          Alignment = std::min(*ParamAlignment, kShadowTLSAlignment); 
-        Value *AShadowPtr = 
-            getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment, 
-                               /*isStore*/ false) 
-                .first; 
- 
-        Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr, 
-                                 Alignment, Size); 
-        // TODO(glider): need to copy origins. 
-      } else { 
-        // Any other parameters mean we need bit-grained tracking of uninit data 
-        Size = DL.getTypeAllocSize(A->getType()); 
-        if (ArgOffset + Size > kParamTLSSize) break; 
-        Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase, 
-                                       kShadowTLSAlignment); 
-        Constant *Cst = dyn_cast<Constant>(ArgShadow); 
-        if (Cst && Cst->isNullValue()) ArgIsInitialized = true; 
-      } 
-      if (MS.TrackOrigins && !ArgIsInitialized) 
-        IRB.CreateStore(getOrigin(A), 
-                        getOriginPtrForArgument(A, IRB, ArgOffset)); 
-      (void)Store; 
-      assert(Size != 0 && Store != nullptr); 
-      LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n"); 
+
+      if (EagerCheck) {
+        insertShadowCheck(A, &CB);
+        continue;
+      }
+      if (ByVal) {
+        // ByVal requires some special handling as it's too big for a single
+        // load
+        assert(A->getType()->isPointerTy() &&
+               "ByVal argument is not a pointer!");
+        Size = DL.getTypeAllocSize(CB.getParamByValType(i));
+        if (ArgOffset + Size > kParamTLSSize) break;
+        const MaybeAlign ParamAlignment(CB.getParamAlign(i));
+        MaybeAlign Alignment = llvm::None;
+        if (ParamAlignment)
+          Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
+        Value *AShadowPtr =
+            getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
+                               /*isStore*/ false)
+                .first;
+
+        Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
+                                 Alignment, Size);
+        // TODO(glider): need to copy origins.
+      } else {
+        // Any other parameters mean we need bit-grained tracking of uninit data
+        Size = DL.getTypeAllocSize(A->getType());
+        if (ArgOffset + Size > kParamTLSSize) break;
+        Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
+                                       kShadowTLSAlignment);
+        Constant *Cst = dyn_cast<Constant>(ArgShadow);
+        if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
+      }
+      if (MS.TrackOrigins && !ArgIsInitialized)
+        IRB.CreateStore(getOrigin(A),
+                        getOriginPtrForArgument(A, IRB, ArgOffset));
+      (void)Store;
+      assert(Size != 0 && Store != nullptr);
+      LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
       ArgOffset += alignTo(Size, kShadowTLSAlignment);
-    } 
-    LLVM_DEBUG(dbgs() << "  done with call args\n"); 
- 
-    FunctionType *FT = CB.getFunctionType(); 
-    if (FT->isVarArg()) { 
-      VAHelper->visitCallBase(CB, IRB); 
-    } 
- 
-    // Now, get the shadow for the RetVal. 
-    if (!CB.getType()->isSized()) 
-      return; 
-    // Don't emit the epilogue for musttail call returns. 
-    if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall()) 
-      return; 
- 
+    }
+    LLVM_DEBUG(dbgs() << "  done with call args\n");
+
+    FunctionType *FT = CB.getFunctionType();
+    if (FT->isVarArg()) {
+      VAHelper->visitCallBase(CB, IRB);
+    }
+
+    // Now, get the shadow for the RetVal.
+    if (!CB.getType()->isSized())
+      return;
+    // Don't emit the epilogue for musttail call returns.
+    if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
+      return;
+
     if (MayCheckCall && CB.hasRetAttr(Attribute::NoUndef)) {
-      setShadow(&CB, getCleanShadow(&CB)); 
-      setOrigin(&CB, getCleanOrigin()); 
-      return; 
-    } 
- 
-    IRBuilder<> IRBBefore(&CB); 
-    // Until we have full dynamic coverage, make sure the retval shadow is 0. 
-    Value *Base = getShadowPtrForRetval(&CB, IRBBefore); 
-    IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base, 
-                                 kShadowTLSAlignment); 
-    BasicBlock::iterator NextInsn; 
-    if (isa<CallInst>(CB)) { 
-      NextInsn = ++CB.getIterator(); 
-      assert(NextInsn != CB.getParent()->end()); 
-    } else { 
-      BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest(); 
-      if (!NormalDest->getSinglePredecessor()) { 
-        // FIXME: this case is tricky, so we are just conservative here. 
-        // Perhaps we need to split the edge between this BB and NormalDest, 
-        // but a naive attempt to use SplitEdge leads to a crash. 
-        setShadow(&CB, getCleanShadow(&CB)); 
-        setOrigin(&CB, getCleanOrigin()); 
-        return; 
-      } 
-      // FIXME: NextInsn is likely in a basic block that has not been visited yet. 
-      // Anything inserted there will be instrumented by MSan later! 
-      NextInsn = NormalDest->getFirstInsertionPt(); 
-      assert(NextInsn != NormalDest->end() && 
-             "Could not find insertion point for retval shadow load"); 
-    } 
-    IRBuilder<> IRBAfter(&*NextInsn); 
-    Value *RetvalShadow = IRBAfter.CreateAlignedLoad( 
-        getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter), 
-        kShadowTLSAlignment, "_msret"); 
-    setShadow(&CB, RetvalShadow); 
-    if (MS.TrackOrigins) 
-      setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy, 
-                                         getOriginPtrForRetval(IRBAfter))); 
-  } 
- 
-  bool isAMustTailRetVal(Value *RetVal) { 
-    if (auto *I = dyn_cast<BitCastInst>(RetVal)) { 
-      RetVal = I->getOperand(0); 
-    } 
-    if (auto *I = dyn_cast<CallInst>(RetVal)) { 
-      return I->isMustTailCall(); 
-    } 
-    return false; 
-  } 
- 
-  void visitReturnInst(ReturnInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *RetVal = I.getReturnValue(); 
-    if (!RetVal) return; 
-    // Don't emit the epilogue for musttail call returns. 
-    if (isAMustTailRetVal(RetVal)) return; 
-    Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB); 
-    bool HasNoUndef = 
-        F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef); 
-    bool StoreShadow = !(ClEagerChecks && HasNoUndef); 
-    // FIXME: Consider using SpecialCaseList to specify a list of functions that 
-    // must always return fully initialized values. For now, we hardcode "main". 
-    bool EagerCheck = (ClEagerChecks && HasNoUndef) || (F.getName() == "main"); 
- 
-    Value *Shadow = getShadow(RetVal); 
-    bool StoreOrigin = true; 
-    if (EagerCheck) { 
-      insertShadowCheck(RetVal, &I); 
-      Shadow = getCleanShadow(RetVal); 
-      StoreOrigin = false; 
-    } 
- 
-    // The caller may still expect information passed over TLS if we pass our 
-    // check 
-    if (StoreShadow) { 
-      IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment); 
-      if (MS.TrackOrigins && StoreOrigin) 
-        IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB)); 
-    } 
-  } 
- 
-  void visitPHINode(PHINode &I) { 
-    IRBuilder<> IRB(&I); 
-    if (!PropagateShadow) { 
-      setShadow(&I, getCleanShadow(&I)); 
-      setOrigin(&I, getCleanOrigin()); 
-      return; 
-    } 
- 
-    ShadowPHINodes.push_back(&I); 
-    setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(), 
-                                "_msphi_s")); 
-    if (MS.TrackOrigins) 
-      setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(), 
-                                  "_msphi_o")); 
-  } 
- 
-  Value *getLocalVarDescription(AllocaInst &I) { 
-    SmallString<2048> StackDescriptionStorage; 
-    raw_svector_ostream StackDescription(StackDescriptionStorage); 
-    // We create a string with a description of the stack allocation and 
-    // pass it into __msan_set_alloca_origin. 
-    // It will be printed by the run-time if stack-originated UMR is found. 
-    // The first 4 bytes of the string are set to '----' and will be replaced 
-    // by __msan_va_arg_overflow_size_tls at the first call. 
-    StackDescription << "----" << I.getName() << "@" << F.getName(); 
-    return createPrivateNonConstGlobalForString(*F.getParent(), 
-                                                StackDescription.str()); 
-  } 
- 
-  void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) { 
-    if (PoisonStack && ClPoisonStackWithCall) { 
-      IRB.CreateCall(MS.MsanPoisonStackFn, 
-                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len}); 
-    } else { 
-      Value *ShadowBase, *OriginBase; 
-      std::tie(ShadowBase, OriginBase) = getShadowOriginPtr( 
-          &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true); 
- 
-      Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0); 
-      IRB.CreateMemSet(ShadowBase, PoisonValue, Len, 
-                       MaybeAlign(I.getAlignment())); 
-    } 
- 
-    if (PoisonStack && MS.TrackOrigins) { 
-      Value *Descr = getLocalVarDescription(I); 
-      IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn, 
-                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len, 
-                      IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()), 
-                      IRB.CreatePointerCast(&F, MS.IntptrTy)}); 
-    } 
-  } 
- 
-  void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) { 
-    Value *Descr = getLocalVarDescription(I); 
-    if (PoisonStack) { 
-      IRB.CreateCall(MS.MsanPoisonAllocaFn, 
-                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len, 
-                      IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())}); 
-    } else { 
-      IRB.CreateCall(MS.MsanUnpoisonAllocaFn, 
-                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len}); 
-    } 
-  } 
- 
-  void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) { 
-    if (!InsPoint) 
-      InsPoint = &I; 
-    IRBuilder<> IRB(InsPoint->getNextNode()); 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType()); 
-    Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize); 
-    if (I.isArrayAllocation()) 
-      Len = IRB.CreateMul(Len, I.getArraySize()); 
- 
-    if (MS.CompileKernel) 
-      poisonAllocaKmsan(I, IRB, Len); 
-    else 
-      poisonAllocaUserspace(I, IRB, Len); 
-  } 
- 
-  void visitAllocaInst(AllocaInst &I) { 
-    setShadow(&I, getCleanShadow(&I)); 
-    setOrigin(&I, getCleanOrigin()); 
-    // We'll get to this alloca later unless it's poisoned at the corresponding 
-    // llvm.lifetime.start. 
-    AllocaSet.insert(&I); 
-  } 
- 
-  void visitSelectInst(SelectInst& I) { 
-    IRBuilder<> IRB(&I); 
-    // a = select b, c, d 
-    Value *B = I.getCondition(); 
-    Value *C = I.getTrueValue(); 
-    Value *D = I.getFalseValue(); 
-    Value *Sb = getShadow(B); 
-    Value *Sc = getShadow(C); 
-    Value *Sd = getShadow(D); 
- 
-    // Result shadow if condition shadow is 0. 
-    Value *Sa0 = IRB.CreateSelect(B, Sc, Sd); 
-    Value *Sa1; 
-    if (I.getType()->isAggregateType()) { 
-      // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do 
-      // an extra "select". This results in much more compact IR. 
-      // Sa = select Sb, poisoned, (select b, Sc, Sd) 
-      Sa1 = getPoisonedShadow(getShadowTy(I.getType())); 
-    } else { 
-      // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ] 
-      // If Sb (condition is poisoned), look for bits in c and d that are equal 
-      // and both unpoisoned. 
-      // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd. 
- 
-      // Cast arguments to shadow-compatible type. 
-      C = CreateAppToShadowCast(IRB, C); 
-      D = CreateAppToShadowCast(IRB, D); 
- 
-      // Result shadow if condition shadow is 1. 
-      Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd}); 
-    } 
-    Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select"); 
-    setShadow(&I, Sa); 
-    if (MS.TrackOrigins) { 
-      // Origins are always i32, so any vector conditions must be flattened. 
-      // FIXME: consider tracking vector origins for app vectors? 
-      if (B->getType()->isVectorTy()) { 
-        Type *FlatTy = getShadowTyNoVec(B->getType()); 
-        B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy), 
-                                ConstantInt::getNullValue(FlatTy)); 
-        Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy), 
-                                      ConstantInt::getNullValue(FlatTy)); 
-      } 
-      // a = select b, c, d 
-      // Oa = Sb ? Ob : (b ? Oc : Od) 
-      setOrigin( 
-          &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()), 
-                               IRB.CreateSelect(B, getOrigin(I.getTrueValue()), 
-                                                getOrigin(I.getFalseValue())))); 
-    } 
-  } 
- 
-  void visitLandingPadInst(LandingPadInst &I) { 
-    // Do nothing. 
-    // See https://github.com/google/sanitizers/issues/504 
-    setShadow(&I, getCleanShadow(&I)); 
-    setOrigin(&I, getCleanOrigin()); 
-  } 
- 
-  void visitCatchSwitchInst(CatchSwitchInst &I) { 
-    setShadow(&I, getCleanShadow(&I)); 
-    setOrigin(&I, getCleanOrigin()); 
-  } 
- 
-  void visitFuncletPadInst(FuncletPadInst &I) { 
-    setShadow(&I, getCleanShadow(&I)); 
-    setOrigin(&I, getCleanOrigin()); 
-  } 
- 
-  void visitGetElementPtrInst(GetElementPtrInst &I) { 
-    handleShadowOr(I); 
-  } 
- 
-  void visitExtractValueInst(ExtractValueInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *Agg = I.getAggregateOperand(); 
-    LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n"); 
-    Value *AggShadow = getShadow(Agg); 
-    LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n"); 
-    Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices()); 
-    LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n"); 
-    setShadow(&I, ResShadow); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  void visitInsertValueInst(InsertValueInst &I) { 
-    IRBuilder<> IRB(&I); 
-    LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n"); 
-    Value *AggShadow = getShadow(I.getAggregateOperand()); 
-    Value *InsShadow = getShadow(I.getInsertedValueOperand()); 
-    LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n"); 
-    LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n"); 
-    Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices()); 
-    LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n"); 
-    setShadow(&I, Res); 
-    setOriginForNaryOp(I); 
-  } 
- 
-  void dumpInst(Instruction &I) { 
-    if (CallInst *CI = dyn_cast<CallInst>(&I)) { 
-      errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n"; 
-    } else { 
-      errs() << "ZZZ " << I.getOpcodeName() << "\n"; 
-    } 
-    errs() << "QQQ " << I << "\n"; 
-  } 
- 
-  void visitResumeInst(ResumeInst &I) { 
-    LLVM_DEBUG(dbgs() << "Resume: " << I << "\n"); 
-    // Nothing to do here. 
-  } 
- 
-  void visitCleanupReturnInst(CleanupReturnInst &CRI) { 
-    LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n"); 
-    // Nothing to do here. 
-  } 
- 
-  void visitCatchReturnInst(CatchReturnInst &CRI) { 
-    LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n"); 
-    // Nothing to do here. 
-  } 
- 
-  void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB, 
-                             const DataLayout &DL, bool isOutput) { 
-    // For each assembly argument, we check its value for being initialized. 
-    // If the argument is a pointer, we assume it points to a single element 
-    // of the corresponding type (or to a 8-byte word, if the type is unsized). 
-    // Each such pointer is instrumented with a call to the runtime library. 
-    Type *OpType = Operand->getType(); 
-    // Check the operand value itself. 
-    insertShadowCheck(Operand, &I); 
-    if (!OpType->isPointerTy() || !isOutput) { 
-      assert(!isOutput); 
-      return; 
-    } 
-    Type *ElType = OpType->getPointerElementType(); 
-    if (!ElType->isSized()) 
-      return; 
-    int Size = DL.getTypeStoreSize(ElType); 
-    Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy()); 
-    Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size); 
-    IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal}); 
-  } 
- 
-  /// Get the number of output arguments returned by pointers. 
-  int getNumOutputArgs(InlineAsm *IA, CallBase *CB) { 
-    int NumRetOutputs = 0; 
-    int NumOutputs = 0; 
-    Type *RetTy = cast<Value>(CB)->getType(); 
-    if (!RetTy->isVoidTy()) { 
-      // Register outputs are returned via the CallInst return value. 
-      auto *ST = dyn_cast<StructType>(RetTy); 
-      if (ST) 
-        NumRetOutputs = ST->getNumElements(); 
-      else 
-        NumRetOutputs = 1; 
-    } 
-    InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints(); 
-    for (size_t i = 0, n = Constraints.size(); i < n; i++) { 
-      InlineAsm::ConstraintInfo Info = Constraints[i]; 
-      switch (Info.Type) { 
-      case InlineAsm::isOutput: 
-        NumOutputs++; 
-        break; 
-      default: 
-        break; 
-      } 
-    } 
-    return NumOutputs - NumRetOutputs; 
-  } 
- 
-  void visitAsmInstruction(Instruction &I) { 
-    // Conservative inline assembly handling: check for poisoned shadow of 
-    // asm() arguments, then unpoison the result and all the memory locations 
-    // pointed to by those arguments. 
-    // An inline asm() statement in C++ contains lists of input and output 
-    // arguments used by the assembly code. These are mapped to operands of the 
-    // CallInst as follows: 
-    //  - nR register outputs ("=r) are returned by value in a single structure 
-    //  (SSA value of the CallInst); 
-    //  - nO other outputs ("=m" and others) are returned by pointer as first 
-    // nO operands of the CallInst; 
-    //  - nI inputs ("r", "m" and others) are passed to CallInst as the 
-    // remaining nI operands. 
-    // The total number of asm() arguments in the source is nR+nO+nI, and the 
-    // corresponding CallInst has nO+nI+1 operands (the last operand is the 
-    // function to be called). 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    CallBase *CB = cast<CallBase>(&I); 
-    IRBuilder<> IRB(&I); 
-    InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand()); 
-    int OutputArgs = getNumOutputArgs(IA, CB); 
-    // The last operand of a CallInst is the function itself. 
-    int NumOperands = CB->getNumOperands() - 1; 
- 
-    // Check input arguments. Doing so before unpoisoning output arguments, so 
-    // that we won't overwrite uninit values before checking them. 
-    for (int i = OutputArgs; i < NumOperands; i++) { 
-      Value *Operand = CB->getOperand(i); 
-      instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false); 
-    } 
-    // Unpoison output arguments. This must happen before the actual InlineAsm 
-    // call, so that the shadow for memory published in the asm() statement 
-    // remains valid. 
-    for (int i = 0; i < OutputArgs; i++) { 
-      Value *Operand = CB->getOperand(i); 
-      instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true); 
-    } 
- 
-    setShadow(&I, getCleanShadow(&I)); 
-    setOrigin(&I, getCleanOrigin()); 
-  } 
- 
+      setShadow(&CB, getCleanShadow(&CB));
+      setOrigin(&CB, getCleanOrigin());
+      return;
+    }
+
+    IRBuilder<> IRBBefore(&CB);
+    // Until we have full dynamic coverage, make sure the retval shadow is 0.
+    Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
+    IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
+                                 kShadowTLSAlignment);
+    BasicBlock::iterator NextInsn;
+    if (isa<CallInst>(CB)) {
+      NextInsn = ++CB.getIterator();
+      assert(NextInsn != CB.getParent()->end());
+    } else {
+      BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest();
+      if (!NormalDest->getSinglePredecessor()) {
+        // FIXME: this case is tricky, so we are just conservative here.
+        // Perhaps we need to split the edge between this BB and NormalDest,
+        // but a naive attempt to use SplitEdge leads to a crash.
+        setShadow(&CB, getCleanShadow(&CB));
+        setOrigin(&CB, getCleanOrigin());
+        return;
+      }
+      // FIXME: NextInsn is likely in a basic block that has not been visited yet.
+      // Anything inserted there will be instrumented by MSan later!
+      NextInsn = NormalDest->getFirstInsertionPt();
+      assert(NextInsn != NormalDest->end() &&
+             "Could not find insertion point for retval shadow load");
+    }
+    IRBuilder<> IRBAfter(&*NextInsn);
+    Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
+        getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter),
+        kShadowTLSAlignment, "_msret");
+    setShadow(&CB, RetvalShadow);
+    if (MS.TrackOrigins)
+      setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy,
+                                         getOriginPtrForRetval(IRBAfter)));
+  }
+
+  bool isAMustTailRetVal(Value *RetVal) {
+    if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
+      RetVal = I->getOperand(0);
+    }
+    if (auto *I = dyn_cast<CallInst>(RetVal)) {
+      return I->isMustTailCall();
+    }
+    return false;
+  }
+
+  void visitReturnInst(ReturnInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *RetVal = I.getReturnValue();
+    if (!RetVal) return;
+    // Don't emit the epilogue for musttail call returns.
+    if (isAMustTailRetVal(RetVal)) return;
+    Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
+    bool HasNoUndef =
+        F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
+    bool StoreShadow = !(ClEagerChecks && HasNoUndef);
+    // FIXME: Consider using SpecialCaseList to specify a list of functions that
+    // must always return fully initialized values. For now, we hardcode "main".
+    bool EagerCheck = (ClEagerChecks && HasNoUndef) || (F.getName() == "main");
+
+    Value *Shadow = getShadow(RetVal);
+    bool StoreOrigin = true;
+    if (EagerCheck) {
+      insertShadowCheck(RetVal, &I);
+      Shadow = getCleanShadow(RetVal);
+      StoreOrigin = false;
+    }
+
+    // The caller may still expect information passed over TLS if we pass our
+    // check
+    if (StoreShadow) {
+      IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+      if (MS.TrackOrigins && StoreOrigin)
+        IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
+    }
+  }
+
+  void visitPHINode(PHINode &I) {
+    IRBuilder<> IRB(&I);
+    if (!PropagateShadow) {
+      setShadow(&I, getCleanShadow(&I));
+      setOrigin(&I, getCleanOrigin());
+      return;
+    }
+
+    ShadowPHINodes.push_back(&I);
+    setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
+                                "_msphi_s"));
+    if (MS.TrackOrigins)
+      setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
+                                  "_msphi_o"));
+  }
+
+  Value *getLocalVarDescription(AllocaInst &I) {
+    SmallString<2048> StackDescriptionStorage;
+    raw_svector_ostream StackDescription(StackDescriptionStorage);
+    // We create a string with a description of the stack allocation and
+    // pass it into __msan_set_alloca_origin.
+    // It will be printed by the run-time if stack-originated UMR is found.
+    // The first 4 bytes of the string are set to '----' and will be replaced
+    // by __msan_va_arg_overflow_size_tls at the first call.
+    StackDescription << "----" << I.getName() << "@" << F.getName();
+    return createPrivateNonConstGlobalForString(*F.getParent(),
+                                                StackDescription.str());
+  }
+
+  void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+    if (PoisonStack && ClPoisonStackWithCall) {
+      IRB.CreateCall(MS.MsanPoisonStackFn,
+                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
+    } else {
+      Value *ShadowBase, *OriginBase;
+      std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
+          &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
+
+      Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
+      IRB.CreateMemSet(ShadowBase, PoisonValue, Len,
+                       MaybeAlign(I.getAlignment()));
+    }
+
+    if (PoisonStack && MS.TrackOrigins) {
+      Value *Descr = getLocalVarDescription(I);
+      IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
+                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
+                      IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
+                      IRB.CreatePointerCast(&F, MS.IntptrTy)});
+    }
+  }
+
+  void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+    Value *Descr = getLocalVarDescription(I);
+    if (PoisonStack) {
+      IRB.CreateCall(MS.MsanPoisonAllocaFn,
+                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
+                      IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
+    } else {
+      IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
+                     {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
+    }
+  }
+
+  void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
+    if (!InsPoint)
+      InsPoint = &I;
+    IRBuilder<> IRB(InsPoint->getNextNode());
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
+    Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
+    if (I.isArrayAllocation())
+      Len = IRB.CreateMul(Len, I.getArraySize());
+
+    if (MS.CompileKernel)
+      poisonAllocaKmsan(I, IRB, Len);
+    else
+      poisonAllocaUserspace(I, IRB, Len);
+  }
+
+  void visitAllocaInst(AllocaInst &I) {
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+    // We'll get to this alloca later unless it's poisoned at the corresponding
+    // llvm.lifetime.start.
+    AllocaSet.insert(&I);
+  }
+
+  void visitSelectInst(SelectInst& I) {
+    IRBuilder<> IRB(&I);
+    // a = select b, c, d
+    Value *B = I.getCondition();
+    Value *C = I.getTrueValue();
+    Value *D = I.getFalseValue();
+    Value *Sb = getShadow(B);
+    Value *Sc = getShadow(C);
+    Value *Sd = getShadow(D);
+
+    // Result shadow if condition shadow is 0.
+    Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
+    Value *Sa1;
+    if (I.getType()->isAggregateType()) {
+      // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
+      // an extra "select". This results in much more compact IR.
+      // Sa = select Sb, poisoned, (select b, Sc, Sd)
+      Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
+    } else {
+      // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
+      // If Sb (condition is poisoned), look for bits in c and d that are equal
+      // and both unpoisoned.
+      // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
+
+      // Cast arguments to shadow-compatible type.
+      C = CreateAppToShadowCast(IRB, C);
+      D = CreateAppToShadowCast(IRB, D);
+
+      // Result shadow if condition shadow is 1.
+      Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
+    }
+    Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
+    setShadow(&I, Sa);
+    if (MS.TrackOrigins) {
+      // Origins are always i32, so any vector conditions must be flattened.
+      // FIXME: consider tracking vector origins for app vectors?
+      if (B->getType()->isVectorTy()) {
+        Type *FlatTy = getShadowTyNoVec(B->getType());
+        B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
+                                ConstantInt::getNullValue(FlatTy));
+        Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
+                                      ConstantInt::getNullValue(FlatTy));
+      }
+      // a = select b, c, d
+      // Oa = Sb ? Ob : (b ? Oc : Od)
+      setOrigin(
+          &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
+                               IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
+                                                getOrigin(I.getFalseValue()))));
+    }
+  }
+
+  void visitLandingPadInst(LandingPadInst &I) {
+    // Do nothing.
+    // See https://github.com/google/sanitizers/issues/504
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+
+  void visitCatchSwitchInst(CatchSwitchInst &I) {
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+
+  void visitFuncletPadInst(FuncletPadInst &I) {
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+
+  void visitGetElementPtrInst(GetElementPtrInst &I) {
+    handleShadowOr(I);
+  }
+
+  void visitExtractValueInst(ExtractValueInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *Agg = I.getAggregateOperand();
+    LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
+    Value *AggShadow = getShadow(Agg);
+    LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
+    Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
+    LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
+    setShadow(&I, ResShadow);
+    setOriginForNaryOp(I);
+  }
+
+  void visitInsertValueInst(InsertValueInst &I) {
+    IRBuilder<> IRB(&I);
+    LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
+    Value *AggShadow = getShadow(I.getAggregateOperand());
+    Value *InsShadow = getShadow(I.getInsertedValueOperand());
+    LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
+    LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
+    Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
+    LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
+    setShadow(&I, Res);
+    setOriginForNaryOp(I);
+  }
+
+  void dumpInst(Instruction &I) {
+    if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+      errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
+    } else {
+      errs() << "ZZZ " << I.getOpcodeName() << "\n";
+    }
+    errs() << "QQQ " << I << "\n";
+  }
+
+  void visitResumeInst(ResumeInst &I) {
+    LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
+    // Nothing to do here.
+  }
+
+  void visitCleanupReturnInst(CleanupReturnInst &CRI) {
+    LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
+    // Nothing to do here.
+  }
+
+  void visitCatchReturnInst(CatchReturnInst &CRI) {
+    LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
+    // Nothing to do here.
+  }
+
+  void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB,
+                             const DataLayout &DL, bool isOutput) {
+    // For each assembly argument, we check its value for being initialized.
+    // If the argument is a pointer, we assume it points to a single element
+    // of the corresponding type (or to a 8-byte word, if the type is unsized).
+    // Each such pointer is instrumented with a call to the runtime library.
+    Type *OpType = Operand->getType();
+    // Check the operand value itself.
+    insertShadowCheck(Operand, &I);
+    if (!OpType->isPointerTy() || !isOutput) {
+      assert(!isOutput);
+      return;
+    }
+    Type *ElType = OpType->getPointerElementType();
+    if (!ElType->isSized())
+      return;
+    int Size = DL.getTypeStoreSize(ElType);
+    Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
+    Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
+    IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
+  }
+
+  /// Get the number of output arguments returned by pointers.
+  int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
+    int NumRetOutputs = 0;
+    int NumOutputs = 0;
+    Type *RetTy = cast<Value>(CB)->getType();
+    if (!RetTy->isVoidTy()) {
+      // Register outputs are returned via the CallInst return value.
+      auto *ST = dyn_cast<StructType>(RetTy);
+      if (ST)
+        NumRetOutputs = ST->getNumElements();
+      else
+        NumRetOutputs = 1;
+    }
+    InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+    for (size_t i = 0, n = Constraints.size(); i < n; i++) {
+      InlineAsm::ConstraintInfo Info = Constraints[i];
+      switch (Info.Type) {
+      case InlineAsm::isOutput:
+        NumOutputs++;
+        break;
+      default:
+        break;
+      }
+    }
+    return NumOutputs - NumRetOutputs;
+  }
+
+  void visitAsmInstruction(Instruction &I) {
+    // Conservative inline assembly handling: check for poisoned shadow of
+    // asm() arguments, then unpoison the result and all the memory locations
+    // pointed to by those arguments.
+    // An inline asm() statement in C++ contains lists of input and output
+    // arguments used by the assembly code. These are mapped to operands of the
+    // CallInst as follows:
+    //  - nR register outputs ("=r) are returned by value in a single structure
+    //  (SSA value of the CallInst);
+    //  - nO other outputs ("=m" and others) are returned by pointer as first
+    // nO operands of the CallInst;
+    //  - nI inputs ("r", "m" and others) are passed to CallInst as the
+    // remaining nI operands.
+    // The total number of asm() arguments in the source is nR+nO+nI, and the
+    // corresponding CallInst has nO+nI+1 operands (the last operand is the
+    // function to be called).
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    CallBase *CB = cast<CallBase>(&I);
+    IRBuilder<> IRB(&I);
+    InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
+    int OutputArgs = getNumOutputArgs(IA, CB);
+    // The last operand of a CallInst is the function itself.
+    int NumOperands = CB->getNumOperands() - 1;
+
+    // Check input arguments. Doing so before unpoisoning output arguments, so
+    // that we won't overwrite uninit values before checking them.
+    for (int i = OutputArgs; i < NumOperands; i++) {
+      Value *Operand = CB->getOperand(i);
+      instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
+    }
+    // Unpoison output arguments. This must happen before the actual InlineAsm
+    // call, so that the shadow for memory published in the asm() statement
+    // remains valid.
+    for (int i = 0; i < OutputArgs; i++) {
+      Value *Operand = CB->getOperand(i);
+      instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
+    }
+
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+
   void visitFreezeInst(FreezeInst &I) {
     // Freeze always returns a fully defined value.
     setShadow(&I, getCleanShadow(&I));
     setOrigin(&I, getCleanOrigin());
   }
 
-  void visitInstruction(Instruction &I) { 
-    // Everything else: stop propagating and check for poisoned shadow. 
-    if (ClDumpStrictInstructions) 
-      dumpInst(I); 
-    LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n"); 
-    for (size_t i = 0, n = I.getNumOperands(); i < n; i++) { 
-      Value *Operand = I.getOperand(i); 
-      if (Operand->getType()->isSized()) 
-        insertShadowCheck(Operand, &I); 
-    } 
-    setShadow(&I, getCleanShadow(&I)); 
-    setOrigin(&I, getCleanOrigin()); 
-  } 
-}; 
- 
-/// AMD64-specific implementation of VarArgHelper. 
-struct VarArgAMD64Helper : public VarArgHelper { 
-  // An unfortunate workaround for asymmetric lowering of va_arg stuff. 
-  // See a comment in visitCallBase for more details. 
-  static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7 
-  static const unsigned AMD64FpEndOffsetSSE = 176; 
-  // If SSE is disabled, fp_offset in va_list is zero. 
-  static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset; 
- 
-  unsigned AMD64FpEndOffset; 
-  Function &F; 
-  MemorySanitizer &MS; 
-  MemorySanitizerVisitor &MSV; 
-  Value *VAArgTLSCopy = nullptr; 
-  Value *VAArgTLSOriginCopy = nullptr; 
-  Value *VAArgOverflowSize = nullptr; 
- 
-  SmallVector<CallInst*, 16> VAStartInstrumentationList; 
- 
-  enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; 
- 
-  VarArgAMD64Helper(Function &F, MemorySanitizer &MS, 
-                    MemorySanitizerVisitor &MSV) 
-      : F(F), MS(MS), MSV(MSV) { 
-    AMD64FpEndOffset = AMD64FpEndOffsetSSE; 
-    for (const auto &Attr : F.getAttributes().getFnAttributes()) { 
-      if (Attr.isStringAttribute() && 
-          (Attr.getKindAsString() == "target-features")) { 
-        if (Attr.getValueAsString().contains("-sse")) 
-          AMD64FpEndOffset = AMD64FpEndOffsetNoSSE; 
-        break; 
-      } 
-    } 
-  } 
- 
-  ArgKind classifyArgument(Value* arg) { 
-    // A very rough approximation of X86_64 argument classification rules. 
-    Type *T = arg->getType(); 
-    if (T->isFPOrFPVectorTy() || T->isX86_MMXTy()) 
-      return AK_FloatingPoint; 
-    if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) 
-      return AK_GeneralPurpose; 
-    if (T->isPointerTy()) 
-      return AK_GeneralPurpose; 
-    return AK_Memory; 
-  } 
- 
-  // For VarArg functions, store the argument shadow in an ABI-specific format 
-  // that corresponds to va_list layout. 
-  // We do this because Clang lowers va_arg in the frontend, and this pass 
-  // only sees the low level code that deals with va_list internals. 
-  // A much easier alternative (provided that Clang emits va_arg instructions) 
-  // would have been to associate each live instance of va_list with a copy of 
-  // MSanParamTLS, and extract shadow on va_arg() call in the argument list 
-  // order. 
-  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { 
-    unsigned GpOffset = 0; 
-    unsigned FpOffset = AMD64GpEndOffset; 
-    unsigned OverflowOffset = AMD64FpEndOffset; 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End; 
-         ++ArgIt) { 
-      Value *A = *ArgIt; 
-      unsigned ArgNo = CB.getArgOperandNo(ArgIt); 
-      bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams(); 
-      bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal); 
-      if (IsByVal) { 
-        // ByVal arguments always go to the overflow area. 
-        // Fixed arguments passed through the overflow area will be stepped 
-        // over by va_start, so don't count them towards the offset. 
-        if (IsFixed) 
-          continue; 
-        assert(A->getType()->isPointerTy()); 
-        Type *RealTy = CB.getParamByValType(ArgNo); 
-        uint64_t ArgSize = DL.getTypeAllocSize(RealTy); 
-        Value *ShadowBase = getShadowPtrForVAArgument( 
-            RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8)); 
-        Value *OriginBase = nullptr; 
-        if (MS.TrackOrigins) 
-          OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset); 
-        OverflowOffset += alignTo(ArgSize, 8); 
-        if (!ShadowBase) 
-          continue; 
-        Value *ShadowPtr, *OriginPtr; 
-        std::tie(ShadowPtr, OriginPtr) = 
-            MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment, 
-                                   /*isStore*/ false); 
- 
-        IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr, 
-                         kShadowTLSAlignment, ArgSize); 
-        if (MS.TrackOrigins) 
-          IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr, 
-                           kShadowTLSAlignment, ArgSize); 
-      } else { 
-        ArgKind AK = classifyArgument(A); 
-        if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset) 
-          AK = AK_Memory; 
-        if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset) 
-          AK = AK_Memory; 
-        Value *ShadowBase, *OriginBase = nullptr; 
-        switch (AK) { 
-          case AK_GeneralPurpose: 
-            ShadowBase = 
-                getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8); 
-            if (MS.TrackOrigins) 
-              OriginBase = 
-                  getOriginPtrForVAArgument(A->getType(), IRB, GpOffset); 
-            GpOffset += 8; 
-            break; 
-          case AK_FloatingPoint: 
-            ShadowBase = 
-                getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16); 
-            if (MS.TrackOrigins) 
-              OriginBase = 
-                  getOriginPtrForVAArgument(A->getType(), IRB, FpOffset); 
-            FpOffset += 16; 
-            break; 
-          case AK_Memory: 
-            if (IsFixed) 
-              continue; 
-            uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); 
-            ShadowBase = 
-                getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8); 
-            if (MS.TrackOrigins) 
-              OriginBase = 
-                  getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset); 
-            OverflowOffset += alignTo(ArgSize, 8); 
-        } 
-        // Take fixed arguments into account for GpOffset and FpOffset, 
-        // but don't actually store shadows for them. 
-        // TODO(glider): don't call get*PtrForVAArgument() for them. 
-        if (IsFixed) 
-          continue; 
-        if (!ShadowBase) 
-          continue; 
-        Value *Shadow = MSV.getShadow(A); 
-        IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment); 
-        if (MS.TrackOrigins) { 
-          Value *Origin = MSV.getOrigin(A); 
-          unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType()); 
-          MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize, 
-                          std::max(kShadowTLSAlignment, kMinOriginAlignment)); 
-        } 
-      } 
-    } 
-    Constant *OverflowSize = 
-      ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset); 
-    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS); 
-  } 
- 
-  /// Compute the shadow address for a given va_arg. 
-  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, 
-                                   unsigned ArgOffset, unsigned ArgSize) { 
-    // Make sure we don't overflow __msan_va_arg_tls. 
-    if (ArgOffset + ArgSize > kParamTLSSize) 
-      return nullptr; 
-    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); 
-    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); 
-    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), 
-                              "_msarg_va_s"); 
-  } 
- 
-  /// Compute the origin address for a given va_arg. 
-  Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) { 
-    Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy); 
-    // getOriginPtrForVAArgument() is always called after 
-    // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never 
-    // overflow. 
-    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); 
-    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0), 
-                              "_msarg_va_o"); 
-  } 
- 
-  void unpoisonVAListTagForInst(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *VAListTag = I.getArgOperand(0); 
-    Value *ShadowPtr, *OriginPtr; 
-    const Align Alignment = Align(8); 
-    std::tie(ShadowPtr, OriginPtr) = 
-        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment, 
-                               /*isStore*/ true); 
- 
-    // Unpoison the whole __va_list_tag. 
-    // FIXME: magic ABI constants. 
-    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), 
-                     /* size */ 24, Alignment, false); 
-    // We shouldn't need to zero out the origins, as they're only checked for 
-    // nonzero shadow. 
-  } 
- 
-  void visitVAStartInst(VAStartInst &I) override { 
-    if (F.getCallingConv() == CallingConv::Win64) 
-      return; 
-    VAStartInstrumentationList.push_back(&I); 
-    unpoisonVAListTagForInst(I); 
-  } 
- 
-  void visitVACopyInst(VACopyInst &I) override { 
-    if (F.getCallingConv() == CallingConv::Win64) return; 
-    unpoisonVAListTagForInst(I); 
-  } 
- 
-  void finalizeInstrumentation() override { 
-    assert(!VAArgOverflowSize && !VAArgTLSCopy && 
-           "finalizeInstrumentation called twice"); 
-    if (!VAStartInstrumentationList.empty()) { 
-      // If there is a va_start in this function, make a backup copy of 
-      // va_arg_tls somewhere in the function entry block. 
+  void visitInstruction(Instruction &I) {
+    // Everything else: stop propagating and check for poisoned shadow.
+    if (ClDumpStrictInstructions)
+      dumpInst(I);
+    LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
+    for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
+      Value *Operand = I.getOperand(i);
+      if (Operand->getType()->isSized())
+        insertShadowCheck(Operand, &I);
+    }
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+};
+
+/// AMD64-specific implementation of VarArgHelper.
+struct VarArgAMD64Helper : public VarArgHelper {
+  // An unfortunate workaround for asymmetric lowering of va_arg stuff.
+  // See a comment in visitCallBase for more details.
+  static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
+  static const unsigned AMD64FpEndOffsetSSE = 176;
+  // If SSE is disabled, fp_offset in va_list is zero.
+  static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
+
+  unsigned AMD64FpEndOffset;
+  Function &F;
+  MemorySanitizer &MS;
+  MemorySanitizerVisitor &MSV;
+  Value *VAArgTLSCopy = nullptr;
+  Value *VAArgTLSOriginCopy = nullptr;
+  Value *VAArgOverflowSize = nullptr;
+
+  SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+  enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
+
+  VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
+                    MemorySanitizerVisitor &MSV)
+      : F(F), MS(MS), MSV(MSV) {
+    AMD64FpEndOffset = AMD64FpEndOffsetSSE;
+    for (const auto &Attr : F.getAttributes().getFnAttributes()) {
+      if (Attr.isStringAttribute() &&
+          (Attr.getKindAsString() == "target-features")) {
+        if (Attr.getValueAsString().contains("-sse"))
+          AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
+        break;
+      }
+    }
+  }
+
+  ArgKind classifyArgument(Value* arg) {
+    // A very rough approximation of X86_64 argument classification rules.
+    Type *T = arg->getType();
+    if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
+      return AK_FloatingPoint;
+    if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
+      return AK_GeneralPurpose;
+    if (T->isPointerTy())
+      return AK_GeneralPurpose;
+    return AK_Memory;
+  }
+
+  // For VarArg functions, store the argument shadow in an ABI-specific format
+  // that corresponds to va_list layout.
+  // We do this because Clang lowers va_arg in the frontend, and this pass
+  // only sees the low level code that deals with va_list internals.
+  // A much easier alternative (provided that Clang emits va_arg instructions)
+  // would have been to associate each live instance of va_list with a copy of
+  // MSanParamTLS, and extract shadow on va_arg() call in the argument list
+  // order.
+  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
+    unsigned GpOffset = 0;
+    unsigned FpOffset = AMD64GpEndOffset;
+    unsigned OverflowOffset = AMD64FpEndOffset;
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
+         ++ArgIt) {
+      Value *A = *ArgIt;
+      unsigned ArgNo = CB.getArgOperandNo(ArgIt);
+      bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
+      bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
+      if (IsByVal) {
+        // ByVal arguments always go to the overflow area.
+        // Fixed arguments passed through the overflow area will be stepped
+        // over by va_start, so don't count them towards the offset.
+        if (IsFixed)
+          continue;
+        assert(A->getType()->isPointerTy());
+        Type *RealTy = CB.getParamByValType(ArgNo);
+        uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
+        Value *ShadowBase = getShadowPtrForVAArgument(
+            RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
+        Value *OriginBase = nullptr;
+        if (MS.TrackOrigins)
+          OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
+        OverflowOffset += alignTo(ArgSize, 8);
+        if (!ShadowBase)
+          continue;
+        Value *ShadowPtr, *OriginPtr;
+        std::tie(ShadowPtr, OriginPtr) =
+            MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
+                                   /*isStore*/ false);
+
+        IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
+                         kShadowTLSAlignment, ArgSize);
+        if (MS.TrackOrigins)
+          IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
+                           kShadowTLSAlignment, ArgSize);
+      } else {
+        ArgKind AK = classifyArgument(A);
+        if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
+          AK = AK_Memory;
+        if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
+          AK = AK_Memory;
+        Value *ShadowBase, *OriginBase = nullptr;
+        switch (AK) {
+          case AK_GeneralPurpose:
+            ShadowBase =
+                getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
+            if (MS.TrackOrigins)
+              OriginBase =
+                  getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
+            GpOffset += 8;
+            break;
+          case AK_FloatingPoint:
+            ShadowBase =
+                getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
+            if (MS.TrackOrigins)
+              OriginBase =
+                  getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
+            FpOffset += 16;
+            break;
+          case AK_Memory:
+            if (IsFixed)
+              continue;
+            uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+            ShadowBase =
+                getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
+            if (MS.TrackOrigins)
+              OriginBase =
+                  getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
+            OverflowOffset += alignTo(ArgSize, 8);
+        }
+        // Take fixed arguments into account for GpOffset and FpOffset,
+        // but don't actually store shadows for them.
+        // TODO(glider): don't call get*PtrForVAArgument() for them.
+        if (IsFixed)
+          continue;
+        if (!ShadowBase)
+          continue;
+        Value *Shadow = MSV.getShadow(A);
+        IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
+        if (MS.TrackOrigins) {
+          Value *Origin = MSV.getOrigin(A);
+          unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
+          MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
+                          std::max(kShadowTLSAlignment, kMinOriginAlignment));
+        }
+      }
+    }
+    Constant *OverflowSize =
+      ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
+    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+  }
+
+  /// Compute the shadow address for a given va_arg.
+  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+                                   unsigned ArgOffset, unsigned ArgSize) {
+    // Make sure we don't overflow __msan_va_arg_tls.
+    if (ArgOffset + ArgSize > kParamTLSSize)
+      return nullptr;
+    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+                              "_msarg_va_s");
+  }
+
+  /// Compute the origin address for a given va_arg.
+  Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
+    // getOriginPtrForVAArgument() is always called after
+    // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
+    // overflow.
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+                              "_msarg_va_o");
+  }
+
+  void unpoisonVAListTagForInst(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr, *OriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(ShadowPtr, OriginPtr) =
+        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
+                               /*isStore*/ true);
+
+    // Unpoison the whole __va_list_tag.
+    // FIXME: magic ABI constants.
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */ 24, Alignment, false);
+    // We shouldn't need to zero out the origins, as they're only checked for
+    // nonzero shadow.
+  }
+
+  void visitVAStartInst(VAStartInst &I) override {
+    if (F.getCallingConv() == CallingConv::Win64)
+      return;
+    VAStartInstrumentationList.push_back(&I);
+    unpoisonVAListTagForInst(I);
+  }
+
+  void visitVACopyInst(VACopyInst &I) override {
+    if (F.getCallingConv() == CallingConv::Win64) return;
+    unpoisonVAListTagForInst(I);
+  }
+
+  void finalizeInstrumentation() override {
+    assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+           "finalizeInstrumentation called twice");
+    if (!VAStartInstrumentationList.empty()) {
+      // If there is a va_start in this function, make a backup copy of
+      // va_arg_tls somewhere in the function entry block.
       IRBuilder<> IRB(MSV.FnPrologueEnd);
-      VAArgOverflowSize = 
-          IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); 
-      Value *CopySize = 
-        IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset), 
-                      VAArgOverflowSize); 
-      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); 
-      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize); 
-      if (MS.TrackOrigins) { 
-        VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); 
-        IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS, 
-                         Align(8), CopySize); 
-      } 
-    } 
- 
-    // Instrument va_start. 
-    // Copy va_list shadow from the backup copy of the TLS contents. 
-    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { 
-      CallInst *OrigInst = VAStartInstrumentationList[i]; 
-      IRBuilder<> IRB(OrigInst->getNextNode()); 
-      Value *VAListTag = OrigInst->getArgOperand(0); 
- 
-      Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C); 
-      Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr( 
-          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), 
-                        ConstantInt::get(MS.IntptrTy, 16)), 
-          PointerType::get(RegSaveAreaPtrTy, 0)); 
-      Value *RegSaveAreaPtr = 
-          IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); 
-      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; 
-      const Align Alignment = Align(16); 
-      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = 
-          MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), 
-                                 Alignment, /*isStore*/ true); 
-      IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, 
-                       AMD64FpEndOffset); 
-      if (MS.TrackOrigins) 
-        IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy, 
-                         Alignment, AMD64FpEndOffset); 
-      Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C); 
-      Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr( 
-          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), 
-                        ConstantInt::get(MS.IntptrTy, 8)), 
-          PointerType::get(OverflowArgAreaPtrTy, 0)); 
-      Value *OverflowArgAreaPtr = 
-          IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr); 
-      Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr; 
-      std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) = 
-          MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(), 
-                                 Alignment, /*isStore*/ true); 
-      Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy, 
-                                             AMD64FpEndOffset); 
-      IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment, 
-                       VAArgOverflowSize); 
-      if (MS.TrackOrigins) { 
-        SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy, 
-                                        AMD64FpEndOffset); 
-        IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment, 
-                         VAArgOverflowSize); 
-      } 
-    } 
-  } 
-}; 
- 
-/// MIPS64-specific implementation of VarArgHelper. 
-struct VarArgMIPS64Helper : public VarArgHelper { 
-  Function &F; 
-  MemorySanitizer &MS; 
-  MemorySanitizerVisitor &MSV; 
-  Value *VAArgTLSCopy = nullptr; 
-  Value *VAArgSize = nullptr; 
- 
-  SmallVector<CallInst*, 16> VAStartInstrumentationList; 
- 
-  VarArgMIPS64Helper(Function &F, MemorySanitizer &MS, 
-                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} 
- 
-  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { 
-    unsigned VAArgOffset = 0; 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(), 
-              End = CB.arg_end(); 
-         ArgIt != End; ++ArgIt) { 
-      Triple TargetTriple(F.getParent()->getTargetTriple()); 
-      Value *A = *ArgIt; 
-      Value *Base; 
-      uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); 
-      if (TargetTriple.getArch() == Triple::mips64) { 
-        // Adjusting the shadow for argument with size < 8 to match the placement 
-        // of bits in big endian system 
-        if (ArgSize < 8) 
-          VAArgOffset += (8 - ArgSize); 
-      } 
-      Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize); 
-      VAArgOffset += ArgSize; 
-      VAArgOffset = alignTo(VAArgOffset, 8); 
-      if (!Base) 
-        continue; 
-      IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); 
-    } 
- 
-    Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset); 
-    // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of 
-    // a new class member i.e. it is the total size of all VarArgs. 
-    IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS); 
-  } 
- 
-  /// Compute the shadow address for a given va_arg. 
-  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, 
-                                   unsigned ArgOffset, unsigned ArgSize) { 
-    // Make sure we don't overflow __msan_va_arg_tls. 
-    if (ArgOffset + ArgSize > kParamTLSSize) 
-      return nullptr; 
-    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); 
-    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); 
-    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), 
-                              "_msarg"); 
-  } 
- 
-  void visitVAStartInst(VAStartInst &I) override { 
-    IRBuilder<> IRB(&I); 
-    VAStartInstrumentationList.push_back(&I); 
-    Value *VAListTag = I.getArgOperand(0); 
-    Value *ShadowPtr, *OriginPtr; 
-    const Align Alignment = Align(8); 
-    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( 
-        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); 
-    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), 
-                     /* size */ 8, Alignment, false); 
-  } 
- 
-  void visitVACopyInst(VACopyInst &I) override { 
-    IRBuilder<> IRB(&I); 
-    VAStartInstrumentationList.push_back(&I); 
-    Value *VAListTag = I.getArgOperand(0); 
-    Value *ShadowPtr, *OriginPtr; 
-    const Align Alignment = Align(8); 
-    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( 
-        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); 
-    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), 
-                     /* size */ 8, Alignment, false); 
-  } 
- 
-  void finalizeInstrumentation() override { 
-    assert(!VAArgSize && !VAArgTLSCopy && 
-           "finalizeInstrumentation called twice"); 
+      VAArgOverflowSize =
+          IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
+      Value *CopySize =
+        IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
+                      VAArgOverflowSize);
+      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
+      if (MS.TrackOrigins) {
+        VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+        IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
+                         Align(8), CopySize);
+      }
+    }
+
+    // Instrument va_start.
+    // Copy va_list shadow from the backup copy of the TLS contents.
+    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+      CallInst *OrigInst = VAStartInstrumentationList[i];
+      IRBuilder<> IRB(OrigInst->getNextNode());
+      Value *VAListTag = OrigInst->getArgOperand(0);
+
+      Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+      Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
+          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                        ConstantInt::get(MS.IntptrTy, 16)),
+          PointerType::get(RegSaveAreaPtrTy, 0));
+      Value *RegSaveAreaPtr =
+          IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
+      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
+      const Align Alignment = Align(16);
+      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
+          MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
+                                 Alignment, /*isStore*/ true);
+      IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
+                       AMD64FpEndOffset);
+      if (MS.TrackOrigins)
+        IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
+                         Alignment, AMD64FpEndOffset);
+      Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+      Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
+          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                        ConstantInt::get(MS.IntptrTy, 8)),
+          PointerType::get(OverflowArgAreaPtrTy, 0));
+      Value *OverflowArgAreaPtr =
+          IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
+      Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
+      std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
+          MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
+                                 Alignment, /*isStore*/ true);
+      Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
+                                             AMD64FpEndOffset);
+      IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
+                       VAArgOverflowSize);
+      if (MS.TrackOrigins) {
+        SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
+                                        AMD64FpEndOffset);
+        IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
+                         VAArgOverflowSize);
+      }
+    }
+  }
+};
+
+/// MIPS64-specific implementation of VarArgHelper.
+struct VarArgMIPS64Helper : public VarArgHelper {
+  Function &F;
+  MemorySanitizer &MS;
+  MemorySanitizerVisitor &MSV;
+  Value *VAArgTLSCopy = nullptr;
+  Value *VAArgSize = nullptr;
+
+  SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+  VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
+                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
+
+  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
+    unsigned VAArgOffset = 0;
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(),
+              End = CB.arg_end();
+         ArgIt != End; ++ArgIt) {
+      Triple TargetTriple(F.getParent()->getTargetTriple());
+      Value *A = *ArgIt;
+      Value *Base;
+      uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+      if (TargetTriple.getArch() == Triple::mips64) {
+        // Adjusting the shadow for argument with size < 8 to match the placement
+        // of bits in big endian system
+        if (ArgSize < 8)
+          VAArgOffset += (8 - ArgSize);
+      }
+      Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
+      VAArgOffset += ArgSize;
+      VAArgOffset = alignTo(VAArgOffset, 8);
+      if (!Base)
+        continue;
+      IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+    }
+
+    Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
+    // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
+    // a new class member i.e. it is the total size of all VarArgs.
+    IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
+  }
+
+  /// Compute the shadow address for a given va_arg.
+  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+                                   unsigned ArgOffset, unsigned ArgSize) {
+    // Make sure we don't overflow __msan_va_arg_tls.
+    if (ArgOffset + ArgSize > kParamTLSSize)
+      return nullptr;
+    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+                              "_msarg");
+  }
+
+  void visitVAStartInst(VAStartInst &I) override {
+    IRBuilder<> IRB(&I);
+    VAStartInstrumentationList.push_back(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr, *OriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */ 8, Alignment, false);
+  }
+
+  void visitVACopyInst(VACopyInst &I) override {
+    IRBuilder<> IRB(&I);
+    VAStartInstrumentationList.push_back(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr, *OriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */ 8, Alignment, false);
+  }
+
+  void finalizeInstrumentation() override {
+    assert(!VAArgSize && !VAArgTLSCopy &&
+           "finalizeInstrumentation called twice");
     IRBuilder<> IRB(MSV.FnPrologueEnd);
-    VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); 
-    Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), 
-                                    VAArgSize); 
- 
-    if (!VAStartInstrumentationList.empty()) { 
-      // If there is a va_start in this function, make a backup copy of 
-      // va_arg_tls somewhere in the function entry block. 
-      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); 
-      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize); 
-    } 
- 
-    // Instrument va_start. 
-    // Copy va_list shadow from the backup copy of the TLS contents. 
-    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { 
-      CallInst *OrigInst = VAStartInstrumentationList[i]; 
-      IRBuilder<> IRB(OrigInst->getNextNode()); 
-      Value *VAListTag = OrigInst->getArgOperand(0); 
-      Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C); 
-      Value *RegSaveAreaPtrPtr = 
-          IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), 
-                             PointerType::get(RegSaveAreaPtrTy, 0)); 
-      Value *RegSaveAreaPtr = 
-          IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); 
-      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; 
-      const Align Alignment = Align(8); 
-      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = 
-          MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), 
-                                 Alignment, /*isStore*/ true); 
-      IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, 
-                       CopySize); 
-    } 
-  } 
-}; 
- 
-/// AArch64-specific implementation of VarArgHelper. 
-struct VarArgAArch64Helper : public VarArgHelper { 
-  static const unsigned kAArch64GrArgSize = 64; 
-  static const unsigned kAArch64VrArgSize = 128; 
- 
-  static const unsigned AArch64GrBegOffset = 0; 
-  static const unsigned AArch64GrEndOffset = kAArch64GrArgSize; 
-  // Make VR space aligned to 16 bytes. 
-  static const unsigned AArch64VrBegOffset = AArch64GrEndOffset; 
-  static const unsigned AArch64VrEndOffset = AArch64VrBegOffset 
-                                             + kAArch64VrArgSize; 
-  static const unsigned AArch64VAEndOffset = AArch64VrEndOffset; 
- 
-  Function &F; 
-  MemorySanitizer &MS; 
-  MemorySanitizerVisitor &MSV; 
-  Value *VAArgTLSCopy = nullptr; 
-  Value *VAArgOverflowSize = nullptr; 
- 
-  SmallVector<CallInst*, 16> VAStartInstrumentationList; 
- 
-  enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; 
- 
-  VarArgAArch64Helper(Function &F, MemorySanitizer &MS, 
-                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} 
- 
-  ArgKind classifyArgument(Value* arg) { 
-    Type *T = arg->getType(); 
-    if (T->isFPOrFPVectorTy()) 
-      return AK_FloatingPoint; 
-    if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) 
-        || (T->isPointerTy())) 
-      return AK_GeneralPurpose; 
-    return AK_Memory; 
-  } 
- 
-  // The instrumentation stores the argument shadow in a non ABI-specific 
-  // format because it does not know which argument is named (since Clang, 
-  // like x86_64 case, lowers the va_args in the frontend and this pass only 
-  // sees the low level code that deals with va_list internals). 
-  // The first seven GR registers are saved in the first 56 bytes of the 
-  // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then 
-  // the remaining arguments. 
-  // Using constant offset within the va_arg TLS array allows fast copy 
-  // in the finalize instrumentation. 
-  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { 
-    unsigned GrOffset = AArch64GrBegOffset; 
-    unsigned VrOffset = AArch64VrBegOffset; 
-    unsigned OverflowOffset = AArch64VAEndOffset; 
- 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End; 
-         ++ArgIt) { 
-      Value *A = *ArgIt; 
-      unsigned ArgNo = CB.getArgOperandNo(ArgIt); 
-      bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams(); 
-      ArgKind AK = classifyArgument(A); 
-      if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset) 
-        AK = AK_Memory; 
-      if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset) 
-        AK = AK_Memory; 
-      Value *Base; 
-      switch (AK) { 
-        case AK_GeneralPurpose: 
-          Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8); 
-          GrOffset += 8; 
-          break; 
-        case AK_FloatingPoint: 
-          Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8); 
-          VrOffset += 16; 
-          break; 
-        case AK_Memory: 
-          // Don't count fixed arguments in the overflow area - va_start will 
-          // skip right over them. 
-          if (IsFixed) 
-            continue; 
-          uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); 
-          Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 
-                                           alignTo(ArgSize, 8)); 
-          OverflowOffset += alignTo(ArgSize, 8); 
-          break; 
-      } 
-      // Count Gp/Vr fixed arguments to their respective offsets, but don't 
-      // bother to actually store a shadow. 
-      if (IsFixed) 
-        continue; 
-      if (!Base) 
-        continue; 
-      IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); 
-    } 
-    Constant *OverflowSize = 
-      ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset); 
-    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS); 
-  } 
- 
-  /// Compute the shadow address for a given va_arg. 
-  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, 
-                                   unsigned ArgOffset, unsigned ArgSize) { 
-    // Make sure we don't overflow __msan_va_arg_tls. 
-    if (ArgOffset + ArgSize > kParamTLSSize) 
-      return nullptr; 
-    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); 
-    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); 
-    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), 
-                              "_msarg"); 
-  } 
- 
-  void visitVAStartInst(VAStartInst &I) override { 
-    IRBuilder<> IRB(&I); 
-    VAStartInstrumentationList.push_back(&I); 
-    Value *VAListTag = I.getArgOperand(0); 
-    Value *ShadowPtr, *OriginPtr; 
-    const Align Alignment = Align(8); 
-    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( 
-        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); 
-    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), 
-                     /* size */ 32, Alignment, false); 
-  } 
- 
-  void visitVACopyInst(VACopyInst &I) override { 
-    IRBuilder<> IRB(&I); 
-    VAStartInstrumentationList.push_back(&I); 
-    Value *VAListTag = I.getArgOperand(0); 
-    Value *ShadowPtr, *OriginPtr; 
-    const Align Alignment = Align(8); 
-    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( 
-        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); 
-    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), 
-                     /* size */ 32, Alignment, false); 
-  } 
- 
-  // Retrieve a va_list field of 'void*' size. 
-  Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) { 
-    Value *SaveAreaPtrPtr = 
-      IRB.CreateIntToPtr( 
-        IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), 
-                      ConstantInt::get(MS.IntptrTy, offset)), 
-        Type::getInt64PtrTy(*MS.C)); 
-    return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr); 
-  } 
- 
-  // Retrieve a va_list field of 'int' size. 
-  Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) { 
-    Value *SaveAreaPtr = 
-      IRB.CreateIntToPtr( 
-        IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), 
-                      ConstantInt::get(MS.IntptrTy, offset)), 
-        Type::getInt32PtrTy(*MS.C)); 
-    Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr); 
-    return IRB.CreateSExt(SaveArea32, MS.IntptrTy); 
-  } 
- 
-  void finalizeInstrumentation() override { 
-    assert(!VAArgOverflowSize && !VAArgTLSCopy && 
-           "finalizeInstrumentation called twice"); 
-    if (!VAStartInstrumentationList.empty()) { 
-      // If there is a va_start in this function, make a backup copy of 
-      // va_arg_tls somewhere in the function entry block. 
+    VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
+    Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
+                                    VAArgSize);
+
+    if (!VAStartInstrumentationList.empty()) {
+      // If there is a va_start in this function, make a backup copy of
+      // va_arg_tls somewhere in the function entry block.
+      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
+    }
+
+    // Instrument va_start.
+    // Copy va_list shadow from the backup copy of the TLS contents.
+    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+      CallInst *OrigInst = VAStartInstrumentationList[i];
+      IRBuilder<> IRB(OrigInst->getNextNode());
+      Value *VAListTag = OrigInst->getArgOperand(0);
+      Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+      Value *RegSaveAreaPtrPtr =
+          IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                             PointerType::get(RegSaveAreaPtrTy, 0));
+      Value *RegSaveAreaPtr =
+          IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
+      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
+      const Align Alignment = Align(8);
+      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
+          MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
+                                 Alignment, /*isStore*/ true);
+      IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
+                       CopySize);
+    }
+  }
+};
+
+/// AArch64-specific implementation of VarArgHelper.
+struct VarArgAArch64Helper : public VarArgHelper {
+  static const unsigned kAArch64GrArgSize = 64;
+  static const unsigned kAArch64VrArgSize = 128;
+
+  static const unsigned AArch64GrBegOffset = 0;
+  static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
+  // Make VR space aligned to 16 bytes.
+  static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
+  static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
+                                             + kAArch64VrArgSize;
+  static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
+
+  Function &F;
+  MemorySanitizer &MS;
+  MemorySanitizerVisitor &MSV;
+  Value *VAArgTLSCopy = nullptr;
+  Value *VAArgOverflowSize = nullptr;
+
+  SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+  enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
+
+  VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
+                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
+
+  ArgKind classifyArgument(Value* arg) {
+    Type *T = arg->getType();
+    if (T->isFPOrFPVectorTy())
+      return AK_FloatingPoint;
+    if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
+        || (T->isPointerTy()))
+      return AK_GeneralPurpose;
+    return AK_Memory;
+  }
+
+  // The instrumentation stores the argument shadow in a non ABI-specific
+  // format because it does not know which argument is named (since Clang,
+  // like x86_64 case, lowers the va_args in the frontend and this pass only
+  // sees the low level code that deals with va_list internals).
+  // The first seven GR registers are saved in the first 56 bytes of the
+  // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
+  // the remaining arguments.
+  // Using constant offset within the va_arg TLS array allows fast copy
+  // in the finalize instrumentation.
+  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
+    unsigned GrOffset = AArch64GrBegOffset;
+    unsigned VrOffset = AArch64VrBegOffset;
+    unsigned OverflowOffset = AArch64VAEndOffset;
+
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
+         ++ArgIt) {
+      Value *A = *ArgIt;
+      unsigned ArgNo = CB.getArgOperandNo(ArgIt);
+      bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
+      ArgKind AK = classifyArgument(A);
+      if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
+        AK = AK_Memory;
+      if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
+        AK = AK_Memory;
+      Value *Base;
+      switch (AK) {
+        case AK_GeneralPurpose:
+          Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
+          GrOffset += 8;
+          break;
+        case AK_FloatingPoint:
+          Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
+          VrOffset += 16;
+          break;
+        case AK_Memory:
+          // Don't count fixed arguments in the overflow area - va_start will
+          // skip right over them.
+          if (IsFixed)
+            continue;
+          uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+          Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
+                                           alignTo(ArgSize, 8));
+          OverflowOffset += alignTo(ArgSize, 8);
+          break;
+      }
+      // Count Gp/Vr fixed arguments to their respective offsets, but don't
+      // bother to actually store a shadow.
+      if (IsFixed)
+        continue;
+      if (!Base)
+        continue;
+      IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+    }
+    Constant *OverflowSize =
+      ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
+    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+  }
+
+  /// Compute the shadow address for a given va_arg.
+  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+                                   unsigned ArgOffset, unsigned ArgSize) {
+    // Make sure we don't overflow __msan_va_arg_tls.
+    if (ArgOffset + ArgSize > kParamTLSSize)
+      return nullptr;
+    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+                              "_msarg");
+  }
+
+  void visitVAStartInst(VAStartInst &I) override {
+    IRBuilder<> IRB(&I);
+    VAStartInstrumentationList.push_back(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr, *OriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */ 32, Alignment, false);
+  }
+
+  void visitVACopyInst(VACopyInst &I) override {
+    IRBuilder<> IRB(&I);
+    VAStartInstrumentationList.push_back(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr, *OriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */ 32, Alignment, false);
+  }
+
+  // Retrieve a va_list field of 'void*' size.
+  Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
+    Value *SaveAreaPtrPtr =
+      IRB.CreateIntToPtr(
+        IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                      ConstantInt::get(MS.IntptrTy, offset)),
+        Type::getInt64PtrTy(*MS.C));
+    return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
+  }
+
+  // Retrieve a va_list field of 'int' size.
+  Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
+    Value *SaveAreaPtr =
+      IRB.CreateIntToPtr(
+        IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                      ConstantInt::get(MS.IntptrTy, offset)),
+        Type::getInt32PtrTy(*MS.C));
+    Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
+    return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
+  }
+
+  void finalizeInstrumentation() override {
+    assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+           "finalizeInstrumentation called twice");
+    if (!VAStartInstrumentationList.empty()) {
+      // If there is a va_start in this function, make a backup copy of
+      // va_arg_tls somewhere in the function entry block.
       IRBuilder<> IRB(MSV.FnPrologueEnd);
-      VAArgOverflowSize = 
-          IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); 
-      Value *CopySize = 
-        IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset), 
-                      VAArgOverflowSize); 
-      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); 
-      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize); 
-    } 
- 
-    Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize); 
-    Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize); 
- 
-    // Instrument va_start, copy va_list shadow from the backup copy of 
-    // the TLS contents. 
-    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { 
-      CallInst *OrigInst = VAStartInstrumentationList[i]; 
-      IRBuilder<> IRB(OrigInst->getNextNode()); 
- 
-      Value *VAListTag = OrigInst->getArgOperand(0); 
- 
-      // The variadic ABI for AArch64 creates two areas to save the incoming 
-      // argument registers (one for 64-bit general register xn-x7 and another 
-      // for 128-bit FP/SIMD vn-v7). 
-      // We need then to propagate the shadow arguments on both regions 
-      // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'. 
-      // The remaining arguments are saved on shadow for 'va::stack'. 
-      // One caveat is it requires only to propagate the non-named arguments, 
-      // however on the call site instrumentation 'all' the arguments are 
-      // saved. So to copy the shadow values from the va_arg TLS array 
-      // we need to adjust the offset for both GR and VR fields based on 
-      // the __{gr,vr}_offs value (since they are stores based on incoming 
-      // named arguments). 
- 
-      // Read the stack pointer from the va_list. 
-      Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0); 
- 
-      // Read both the __gr_top and __gr_off and add them up. 
-      Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8); 
-      Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24); 
- 
-      Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea); 
- 
-      // Read both the __vr_top and __vr_off and add them up. 
-      Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16); 
-      Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28); 
- 
-      Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea); 
- 
-      // It does not know how many named arguments is being used and, on the 
-      // callsite all the arguments were saved.  Since __gr_off is defined as 
-      // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic 
-      // argument by ignoring the bytes of shadow from named arguments. 
-      Value *GrRegSaveAreaShadowPtrOff = 
-        IRB.CreateAdd(GrArgSize, GrOffSaveArea); 
- 
-      Value *GrRegSaveAreaShadowPtr = 
-          MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(), 
-                                 Align(8), /*isStore*/ true) 
-              .first; 
- 
-      Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, 
-                                              GrRegSaveAreaShadowPtrOff); 
-      Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff); 
- 
-      IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8), 
-                       GrCopySize); 
- 
-      // Again, but for FP/SIMD values. 
-      Value *VrRegSaveAreaShadowPtrOff = 
-          IRB.CreateAdd(VrArgSize, VrOffSaveArea); 
- 
-      Value *VrRegSaveAreaShadowPtr = 
-          MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(), 
-                                 Align(8), /*isStore*/ true) 
-              .first; 
- 
-      Value *VrSrcPtr = IRB.CreateInBoundsGEP( 
-        IRB.getInt8Ty(), 
-        IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, 
-                              IRB.getInt32(AArch64VrBegOffset)), 
-        VrRegSaveAreaShadowPtrOff); 
-      Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff); 
- 
-      IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8), 
-                       VrCopySize); 
- 
-      // And finally for remaining arguments. 
-      Value *StackSaveAreaShadowPtr = 
-          MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(), 
-                                 Align(16), /*isStore*/ true) 
-              .first; 
- 
-      Value *StackSrcPtr = 
-        IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, 
-                              IRB.getInt32(AArch64VAEndOffset)); 
- 
-      IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr, 
-                       Align(16), VAArgOverflowSize); 
-    } 
-  } 
-}; 
- 
-/// PowerPC64-specific implementation of VarArgHelper. 
-struct VarArgPowerPC64Helper : public VarArgHelper { 
-  Function &F; 
-  MemorySanitizer &MS; 
-  MemorySanitizerVisitor &MSV; 
-  Value *VAArgTLSCopy = nullptr; 
-  Value *VAArgSize = nullptr; 
- 
-  SmallVector<CallInst*, 16> VAStartInstrumentationList; 
- 
-  VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS, 
-                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} 
- 
-  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { 
-    // For PowerPC, we need to deal with alignment of stack arguments - 
-    // they are mostly aligned to 8 bytes, but vectors and i128 arrays 
-    // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes, 
+      VAArgOverflowSize =
+          IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
+      Value *CopySize =
+        IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
+                      VAArgOverflowSize);
+      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
+    }
+
+    Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
+    Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
+
+    // Instrument va_start, copy va_list shadow from the backup copy of
+    // the TLS contents.
+    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+      CallInst *OrigInst = VAStartInstrumentationList[i];
+      IRBuilder<> IRB(OrigInst->getNextNode());
+
+      Value *VAListTag = OrigInst->getArgOperand(0);
+
+      // The variadic ABI for AArch64 creates two areas to save the incoming
+      // argument registers (one for 64-bit general register xn-x7 and another
+      // for 128-bit FP/SIMD vn-v7).
+      // We need then to propagate the shadow arguments on both regions
+      // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
+      // The remaining arguments are saved on shadow for 'va::stack'.
+      // One caveat is it requires only to propagate the non-named arguments,
+      // however on the call site instrumentation 'all' the arguments are
+      // saved. So to copy the shadow values from the va_arg TLS array
+      // we need to adjust the offset for both GR and VR fields based on
+      // the __{gr,vr}_offs value (since they are stores based on incoming
+      // named arguments).
+
+      // Read the stack pointer from the va_list.
+      Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
+
+      // Read both the __gr_top and __gr_off and add them up.
+      Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
+      Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
+
+      Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
+
+      // Read both the __vr_top and __vr_off and add them up.
+      Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
+      Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
+
+      Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
+
+      // It does not know how many named arguments is being used and, on the
+      // callsite all the arguments were saved.  Since __gr_off is defined as
+      // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
+      // argument by ignoring the bytes of shadow from named arguments.
+      Value *GrRegSaveAreaShadowPtrOff =
+        IRB.CreateAdd(GrArgSize, GrOffSaveArea);
+
+      Value *GrRegSaveAreaShadowPtr =
+          MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
+                                 Align(8), /*isStore*/ true)
+              .first;
+
+      Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+                                              GrRegSaveAreaShadowPtrOff);
+      Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
+
+      IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
+                       GrCopySize);
+
+      // Again, but for FP/SIMD values.
+      Value *VrRegSaveAreaShadowPtrOff =
+          IRB.CreateAdd(VrArgSize, VrOffSaveArea);
+
+      Value *VrRegSaveAreaShadowPtr =
+          MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
+                                 Align(8), /*isStore*/ true)
+              .first;
+
+      Value *VrSrcPtr = IRB.CreateInBoundsGEP(
+        IRB.getInt8Ty(),
+        IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+                              IRB.getInt32(AArch64VrBegOffset)),
+        VrRegSaveAreaShadowPtrOff);
+      Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
+
+      IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
+                       VrCopySize);
+
+      // And finally for remaining arguments.
+      Value *StackSaveAreaShadowPtr =
+          MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
+                                 Align(16), /*isStore*/ true)
+              .first;
+
+      Value *StackSrcPtr =
+        IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+                              IRB.getInt32(AArch64VAEndOffset));
+
+      IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
+                       Align(16), VAArgOverflowSize);
+    }
+  }
+};
+
+/// PowerPC64-specific implementation of VarArgHelper.
+struct VarArgPowerPC64Helper : public VarArgHelper {
+  Function &F;
+  MemorySanitizer &MS;
+  MemorySanitizerVisitor &MSV;
+  Value *VAArgTLSCopy = nullptr;
+  Value *VAArgSize = nullptr;
+
+  SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+  VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
+                    MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
+
+  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
+    // For PowerPC, we need to deal with alignment of stack arguments -
+    // they are mostly aligned to 8 bytes, but vectors and i128 arrays
+    // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
     // For that reason, we compute current offset from stack pointer (which is
     // always properly aligned), and offset for the first vararg, then subtract
     // them.
-    unsigned VAArgBase; 
-    Triple TargetTriple(F.getParent()->getTargetTriple()); 
-    // Parameter save area starts at 48 bytes from frame pointer for ABIv1, 
-    // and 32 bytes for ABIv2.  This is usually determined by target 
-    // endianness, but in theory could be overridden by function attribute. 
-    if (TargetTriple.getArch() == Triple::ppc64) 
-      VAArgBase = 48; 
-    else 
-      VAArgBase = 32; 
-    unsigned VAArgOffset = VAArgBase; 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End; 
-         ++ArgIt) { 
-      Value *A = *ArgIt; 
-      unsigned ArgNo = CB.getArgOperandNo(ArgIt); 
-      bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams(); 
-      bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal); 
-      if (IsByVal) { 
-        assert(A->getType()->isPointerTy()); 
-        Type *RealTy = CB.getParamByValType(ArgNo); 
-        uint64_t ArgSize = DL.getTypeAllocSize(RealTy); 
-        MaybeAlign ArgAlign = CB.getParamAlign(ArgNo); 
-        if (!ArgAlign || *ArgAlign < Align(8)) 
-          ArgAlign = Align(8); 
-        VAArgOffset = alignTo(VAArgOffset, ArgAlign); 
-        if (!IsFixed) { 
-          Value *Base = getShadowPtrForVAArgument( 
-              RealTy, IRB, VAArgOffset - VAArgBase, ArgSize); 
-          if (Base) { 
-            Value *AShadowPtr, *AOriginPtr; 
-            std::tie(AShadowPtr, AOriginPtr) = 
-                MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), 
-                                       kShadowTLSAlignment, /*isStore*/ false); 
- 
-            IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr, 
-                             kShadowTLSAlignment, ArgSize); 
-          } 
-        } 
-        VAArgOffset += alignTo(ArgSize, 8); 
-      } else { 
-        Value *Base; 
-        uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); 
-        uint64_t ArgAlign = 8; 
-        if (A->getType()->isArrayTy()) { 
-          // Arrays are aligned to element size, except for long double 
-          // arrays, which are aligned to 8 bytes. 
-          Type *ElementTy = A->getType()->getArrayElementType(); 
-          if (!ElementTy->isPPC_FP128Ty()) 
-            ArgAlign = DL.getTypeAllocSize(ElementTy); 
-        } else if (A->getType()->isVectorTy()) { 
-          // Vectors are naturally aligned. 
-          ArgAlign = DL.getTypeAllocSize(A->getType()); 
-        } 
-        if (ArgAlign < 8) 
-          ArgAlign = 8; 
-        VAArgOffset = alignTo(VAArgOffset, ArgAlign); 
-        if (DL.isBigEndian()) { 
-          // Adjusting the shadow for argument with size < 8 to match the placement 
-          // of bits in big endian system 
-          if (ArgSize < 8) 
-            VAArgOffset += (8 - ArgSize); 
-        } 
-        if (!IsFixed) { 
-          Base = getShadowPtrForVAArgument(A->getType(), IRB, 
-                                           VAArgOffset - VAArgBase, ArgSize); 
-          if (Base) 
-            IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); 
-        } 
-        VAArgOffset += ArgSize; 
-        VAArgOffset = alignTo(VAArgOffset, 8); 
-      } 
-      if (IsFixed) 
-        VAArgBase = VAArgOffset; 
-    } 
- 
-    Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), 
-                                                VAArgOffset - VAArgBase); 
-    // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of 
-    // a new class member i.e. it is the total size of all VarArgs. 
-    IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS); 
-  } 
- 
-  /// Compute the shadow address for a given va_arg. 
-  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, 
-                                   unsigned ArgOffset, unsigned ArgSize) { 
-    // Make sure we don't overflow __msan_va_arg_tls. 
-    if (ArgOffset + ArgSize > kParamTLSSize) 
-      return nullptr; 
-    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); 
-    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); 
-    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), 
-                              "_msarg"); 
-  } 
- 
-  void visitVAStartInst(VAStartInst &I) override { 
-    IRBuilder<> IRB(&I); 
-    VAStartInstrumentationList.push_back(&I); 
-    Value *VAListTag = I.getArgOperand(0); 
-    Value *ShadowPtr, *OriginPtr; 
-    const Align Alignment = Align(8); 
-    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( 
-        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); 
-    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), 
-                     /* size */ 8, Alignment, false); 
-  } 
- 
-  void visitVACopyInst(VACopyInst &I) override { 
-    IRBuilder<> IRB(&I); 
-    Value *VAListTag = I.getArgOperand(0); 
-    Value *ShadowPtr, *OriginPtr; 
-    const Align Alignment = Align(8); 
-    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( 
-        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); 
-    // Unpoison the whole __va_list_tag. 
-    // FIXME: magic ABI constants. 
-    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), 
-                     /* size */ 8, Alignment, false); 
-  } 
- 
-  void finalizeInstrumentation() override { 
-    assert(!VAArgSize && !VAArgTLSCopy && 
-           "finalizeInstrumentation called twice"); 
+    unsigned VAArgBase;
+    Triple TargetTriple(F.getParent()->getTargetTriple());
+    // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
+    // and 32 bytes for ABIv2.  This is usually determined by target
+    // endianness, but in theory could be overridden by function attribute.
+    if (TargetTriple.getArch() == Triple::ppc64)
+      VAArgBase = 48;
+    else
+      VAArgBase = 32;
+    unsigned VAArgOffset = VAArgBase;
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
+         ++ArgIt) {
+      Value *A = *ArgIt;
+      unsigned ArgNo = CB.getArgOperandNo(ArgIt);
+      bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
+      bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
+      if (IsByVal) {
+        assert(A->getType()->isPointerTy());
+        Type *RealTy = CB.getParamByValType(ArgNo);
+        uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
+        MaybeAlign ArgAlign = CB.getParamAlign(ArgNo);
+        if (!ArgAlign || *ArgAlign < Align(8))
+          ArgAlign = Align(8);
+        VAArgOffset = alignTo(VAArgOffset, ArgAlign);
+        if (!IsFixed) {
+          Value *Base = getShadowPtrForVAArgument(
+              RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
+          if (Base) {
+            Value *AShadowPtr, *AOriginPtr;
+            std::tie(AShadowPtr, AOriginPtr) =
+                MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
+                                       kShadowTLSAlignment, /*isStore*/ false);
+
+            IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
+                             kShadowTLSAlignment, ArgSize);
+          }
+        }
+        VAArgOffset += alignTo(ArgSize, 8);
+      } else {
+        Value *Base;
+        uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+        uint64_t ArgAlign = 8;
+        if (A->getType()->isArrayTy()) {
+          // Arrays are aligned to element size, except for long double
+          // arrays, which are aligned to 8 bytes.
+          Type *ElementTy = A->getType()->getArrayElementType();
+          if (!ElementTy->isPPC_FP128Ty())
+            ArgAlign = DL.getTypeAllocSize(ElementTy);
+        } else if (A->getType()->isVectorTy()) {
+          // Vectors are naturally aligned.
+          ArgAlign = DL.getTypeAllocSize(A->getType());
+        }
+        if (ArgAlign < 8)
+          ArgAlign = 8;
+        VAArgOffset = alignTo(VAArgOffset, ArgAlign);
+        if (DL.isBigEndian()) {
+          // Adjusting the shadow for argument with size < 8 to match the placement
+          // of bits in big endian system
+          if (ArgSize < 8)
+            VAArgOffset += (8 - ArgSize);
+        }
+        if (!IsFixed) {
+          Base = getShadowPtrForVAArgument(A->getType(), IRB,
+                                           VAArgOffset - VAArgBase, ArgSize);
+          if (Base)
+            IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+        }
+        VAArgOffset += ArgSize;
+        VAArgOffset = alignTo(VAArgOffset, 8);
+      }
+      if (IsFixed)
+        VAArgBase = VAArgOffset;
+    }
+
+    Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(),
+                                                VAArgOffset - VAArgBase);
+    // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
+    // a new class member i.e. it is the total size of all VarArgs.
+    IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
+  }
+
+  /// Compute the shadow address for a given va_arg.
+  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+                                   unsigned ArgOffset, unsigned ArgSize) {
+    // Make sure we don't overflow __msan_va_arg_tls.
+    if (ArgOffset + ArgSize > kParamTLSSize)
+      return nullptr;
+    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+                              "_msarg");
+  }
+
+  void visitVAStartInst(VAStartInst &I) override {
+    IRBuilder<> IRB(&I);
+    VAStartInstrumentationList.push_back(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr, *OriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */ 8, Alignment, false);
+  }
+
+  void visitVACopyInst(VACopyInst &I) override {
+    IRBuilder<> IRB(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr, *OriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+        VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+    // Unpoison the whole __va_list_tag.
+    // FIXME: magic ABI constants.
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */ 8, Alignment, false);
+  }
+
+  void finalizeInstrumentation() override {
+    assert(!VAArgSize && !VAArgTLSCopy &&
+           "finalizeInstrumentation called twice");
     IRBuilder<> IRB(MSV.FnPrologueEnd);
-    VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); 
-    Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), 
-                                    VAArgSize); 
- 
-    if (!VAStartInstrumentationList.empty()) { 
-      // If there is a va_start in this function, make a backup copy of 
-      // va_arg_tls somewhere in the function entry block. 
-      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); 
-      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize); 
-    } 
- 
-    // Instrument va_start. 
-    // Copy va_list shadow from the backup copy of the TLS contents. 
-    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { 
-      CallInst *OrigInst = VAStartInstrumentationList[i]; 
-      IRBuilder<> IRB(OrigInst->getNextNode()); 
-      Value *VAListTag = OrigInst->getArgOperand(0); 
-      Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C); 
-      Value *RegSaveAreaPtrPtr = 
-          IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), 
-                             PointerType::get(RegSaveAreaPtrTy, 0)); 
-      Value *RegSaveAreaPtr = 
-          IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); 
-      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; 
-      const Align Alignment = Align(8); 
-      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = 
-          MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), 
-                                 Alignment, /*isStore*/ true); 
-      IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, 
-                       CopySize); 
-    } 
-  } 
-}; 
- 
-/// SystemZ-specific implementation of VarArgHelper. 
-struct VarArgSystemZHelper : public VarArgHelper { 
-  static const unsigned SystemZGpOffset = 16; 
-  static const unsigned SystemZGpEndOffset = 56; 
-  static const unsigned SystemZFpOffset = 128; 
-  static const unsigned SystemZFpEndOffset = 160; 
-  static const unsigned SystemZMaxVrArgs = 8; 
-  static const unsigned SystemZRegSaveAreaSize = 160; 
-  static const unsigned SystemZOverflowOffset = 160; 
-  static const unsigned SystemZVAListTagSize = 32; 
-  static const unsigned SystemZOverflowArgAreaPtrOffset = 16; 
-  static const unsigned SystemZRegSaveAreaPtrOffset = 24; 
- 
-  Function &F; 
-  MemorySanitizer &MS; 
-  MemorySanitizerVisitor &MSV; 
-  Value *VAArgTLSCopy = nullptr; 
-  Value *VAArgTLSOriginCopy = nullptr; 
-  Value *VAArgOverflowSize = nullptr; 
- 
-  SmallVector<CallInst *, 16> VAStartInstrumentationList; 
- 
-  enum class ArgKind { 
-    GeneralPurpose, 
-    FloatingPoint, 
-    Vector, 
-    Memory, 
-    Indirect, 
-  }; 
- 
-  enum class ShadowExtension { None, Zero, Sign }; 
- 
-  VarArgSystemZHelper(Function &F, MemorySanitizer &MS, 
-                      MemorySanitizerVisitor &MSV) 
-      : F(F), MS(MS), MSV(MSV) {} 
- 
-  ArgKind classifyArgument(Type *T, bool IsSoftFloatABI) { 
-    // T is a SystemZABIInfo::classifyArgumentType() output, and there are 
-    // only a few possibilities of what it can be. In particular, enums, single 
-    // element structs and large types have already been taken care of. 
- 
-    // Some i128 and fp128 arguments are converted to pointers only in the 
-    // back end. 
-    if (T->isIntegerTy(128) || T->isFP128Ty()) 
-      return ArgKind::Indirect; 
-    if (T->isFloatingPointTy()) 
-      return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint; 
-    if (T->isIntegerTy() || T->isPointerTy()) 
-      return ArgKind::GeneralPurpose; 
-    if (T->isVectorTy()) 
-      return ArgKind::Vector; 
-    return ArgKind::Memory; 
-  } 
- 
-  ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) { 
-    // ABI says: "One of the simple integer types no more than 64 bits wide. 
-    // ... If such an argument is shorter than 64 bits, replace it by a full 
-    // 64-bit integer representing the same number, using sign or zero 
-    // extension". Shadow for an integer argument has the same type as the 
-    // argument itself, so it can be sign or zero extended as well. 
-    bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt); 
-    bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt); 
-    if (ZExt) { 
-      assert(!SExt); 
-      return ShadowExtension::Zero; 
-    } 
-    if (SExt) { 
-      assert(!ZExt); 
-      return ShadowExtension::Sign; 
-    } 
-    return ShadowExtension::None; 
-  } 
- 
-  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { 
-    bool IsSoftFloatABI = CB.getCalledFunction() 
-                              ->getFnAttribute("use-soft-float") 
-                              .getValueAsString() == "true"; 
-    unsigned GpOffset = SystemZGpOffset; 
-    unsigned FpOffset = SystemZFpOffset; 
-    unsigned VrIndex = 0; 
-    unsigned OverflowOffset = SystemZOverflowOffset; 
-    const DataLayout &DL = F.getParent()->getDataLayout(); 
-    for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End; 
-         ++ArgIt) { 
-      Value *A = *ArgIt; 
-      unsigned ArgNo = CB.getArgOperandNo(ArgIt); 
-      bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams(); 
-      // SystemZABIInfo does not produce ByVal parameters. 
-      assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal)); 
-      Type *T = A->getType(); 
-      ArgKind AK = classifyArgument(T, IsSoftFloatABI); 
-      if (AK == ArgKind::Indirect) { 
-        T = PointerType::get(T, 0); 
-        AK = ArgKind::GeneralPurpose; 
-      } 
-      if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset) 
-        AK = ArgKind::Memory; 
-      if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset) 
-        AK = ArgKind::Memory; 
-      if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed)) 
-        AK = ArgKind::Memory; 
-      Value *ShadowBase = nullptr; 
-      Value *OriginBase = nullptr; 
-      ShadowExtension SE = ShadowExtension::None; 
-      switch (AK) { 
-      case ArgKind::GeneralPurpose: { 
-        // Always keep track of GpOffset, but store shadow only for varargs. 
-        uint64_t ArgSize = 8; 
-        if (GpOffset + ArgSize <= kParamTLSSize) { 
-          if (!IsFixed) { 
-            SE = getShadowExtension(CB, ArgNo); 
-            uint64_t GapSize = 0; 
-            if (SE == ShadowExtension::None) { 
-              uint64_t ArgAllocSize = DL.getTypeAllocSize(T); 
-              assert(ArgAllocSize <= ArgSize); 
-              GapSize = ArgSize - ArgAllocSize; 
-            } 
-            ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize); 
-            if (MS.TrackOrigins) 
-              OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize); 
-          } 
-          GpOffset += ArgSize; 
-        } else { 
-          GpOffset = kParamTLSSize; 
-        } 
-        break; 
-      } 
-      case ArgKind::FloatingPoint: { 
-        // Always keep track of FpOffset, but store shadow only for varargs. 
-        uint64_t ArgSize = 8; 
-        if (FpOffset + ArgSize <= kParamTLSSize) { 
-          if (!IsFixed) { 
-            // PoP says: "A short floating-point datum requires only the 
-            // left-most 32 bit positions of a floating-point register". 
-            // Therefore, in contrast to AK_GeneralPurpose and AK_Memory, 
-            // don't extend shadow and don't mind the gap. 
-            ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset); 
-            if (MS.TrackOrigins) 
-              OriginBase = getOriginPtrForVAArgument(IRB, FpOffset); 
-          } 
-          FpOffset += ArgSize; 
-        } else { 
-          FpOffset = kParamTLSSize; 
-        } 
-        break; 
-      } 
-      case ArgKind::Vector: { 
-        // Keep track of VrIndex. No need to store shadow, since vector varargs 
-        // go through AK_Memory. 
-        assert(IsFixed); 
-        VrIndex++; 
-        break; 
-      } 
-      case ArgKind::Memory: { 
-        // Keep track of OverflowOffset and store shadow only for varargs. 
-        // Ignore fixed args, since we need to copy only the vararg portion of 
-        // the overflow area shadow. 
-        if (!IsFixed) { 
-          uint64_t ArgAllocSize = DL.getTypeAllocSize(T); 
-          uint64_t ArgSize = alignTo(ArgAllocSize, 8); 
-          if (OverflowOffset + ArgSize <= kParamTLSSize) { 
-            SE = getShadowExtension(CB, ArgNo); 
-            uint64_t GapSize = 
-                SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0; 
-            ShadowBase = 
-                getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize); 
-            if (MS.TrackOrigins) 
-              OriginBase = 
-                  getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize); 
-            OverflowOffset += ArgSize; 
-          } else { 
-            OverflowOffset = kParamTLSSize; 
-          } 
-        } 
-        break; 
-      } 
-      case ArgKind::Indirect: 
-        llvm_unreachable("Indirect must be converted to GeneralPurpose"); 
-      } 
-      if (ShadowBase == nullptr) 
-        continue; 
-      Value *Shadow = MSV.getShadow(A); 
-      if (SE != ShadowExtension::None) 
-        Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(), 
-                                      /*Signed*/ SE == ShadowExtension::Sign); 
-      ShadowBase = IRB.CreateIntToPtr( 
-          ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s"); 
-      IRB.CreateStore(Shadow, ShadowBase); 
-      if (MS.TrackOrigins) { 
-        Value *Origin = MSV.getOrigin(A); 
-        unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType()); 
-        MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize, 
-                        kMinOriginAlignment); 
-      } 
-    } 
-    Constant *OverflowSize = ConstantInt::get( 
-        IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset); 
-    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS); 
-  } 
- 
-  Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) { 
-    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); 
-    return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); 
-  } 
- 
-  Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) { 
-    Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy); 
-    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); 
-    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0), 
-                              "_msarg_va_o"); 
-  } 
- 
-  void unpoisonVAListTagForInst(IntrinsicInst &I) { 
-    IRBuilder<> IRB(&I); 
-    Value *VAListTag = I.getArgOperand(0); 
-    Value *ShadowPtr, *OriginPtr; 
-    const Align Alignment = Align(8); 
-    std::tie(ShadowPtr, OriginPtr) = 
-        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment, 
-                               /*isStore*/ true); 
-    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), 
-                     SystemZVAListTagSize, Alignment, false); 
-  } 
- 
-  void visitVAStartInst(VAStartInst &I) override { 
-    VAStartInstrumentationList.push_back(&I); 
-    unpoisonVAListTagForInst(I); 
-  } 
- 
-  void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); } 
- 
-  void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) { 
-    Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C); 
-    Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr( 
-        IRB.CreateAdd( 
-            IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), 
-            ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)), 
-        PointerType::get(RegSaveAreaPtrTy, 0)); 
-    Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); 
-    Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; 
-    const Align Alignment = Align(8); 
-    std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = 
-        MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment, 
-                               /*isStore*/ true); 
-    // TODO(iii): copy only fragments filled by visitCallBase() 
-    IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, 
-                     SystemZRegSaveAreaSize); 
-    if (MS.TrackOrigins) 
-      IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy, 
-                       Alignment, SystemZRegSaveAreaSize); 
-  } 
- 
-  void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) { 
-    Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C); 
-    Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr( 
-        IRB.CreateAdd( 
-            IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), 
-            ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)), 
-        PointerType::get(OverflowArgAreaPtrTy, 0)); 
-    Value *OverflowArgAreaPtr = 
-        IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr); 
-    Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr; 
-    const Align Alignment = Align(8); 
-    std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) = 
-        MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(), 
-                               Alignment, /*isStore*/ true); 
-    Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy, 
-                                           SystemZOverflowOffset); 
-    IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment, 
-                     VAArgOverflowSize); 
-    if (MS.TrackOrigins) { 
-      SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy, 
-                                      SystemZOverflowOffset); 
-      IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment, 
-                       VAArgOverflowSize); 
-    } 
-  } 
- 
-  void finalizeInstrumentation() override { 
-    assert(!VAArgOverflowSize && !VAArgTLSCopy && 
-           "finalizeInstrumentation called twice"); 
-    if (!VAStartInstrumentationList.empty()) { 
-      // If there is a va_start in this function, make a backup copy of 
-      // va_arg_tls somewhere in the function entry block. 
+    VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
+    Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
+                                    VAArgSize);
+
+    if (!VAStartInstrumentationList.empty()) {
+      // If there is a va_start in this function, make a backup copy of
+      // va_arg_tls somewhere in the function entry block.
+      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
+    }
+
+    // Instrument va_start.
+    // Copy va_list shadow from the backup copy of the TLS contents.
+    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+      CallInst *OrigInst = VAStartInstrumentationList[i];
+      IRBuilder<> IRB(OrigInst->getNextNode());
+      Value *VAListTag = OrigInst->getArgOperand(0);
+      Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+      Value *RegSaveAreaPtrPtr =
+          IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                             PointerType::get(RegSaveAreaPtrTy, 0));
+      Value *RegSaveAreaPtr =
+          IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
+      Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
+      const Align Alignment = Align(8);
+      std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
+          MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
+                                 Alignment, /*isStore*/ true);
+      IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
+                       CopySize);
+    }
+  }
+};
+
+/// SystemZ-specific implementation of VarArgHelper.
+struct VarArgSystemZHelper : public VarArgHelper {
+  static const unsigned SystemZGpOffset = 16;
+  static const unsigned SystemZGpEndOffset = 56;
+  static const unsigned SystemZFpOffset = 128;
+  static const unsigned SystemZFpEndOffset = 160;
+  static const unsigned SystemZMaxVrArgs = 8;
+  static const unsigned SystemZRegSaveAreaSize = 160;
+  static const unsigned SystemZOverflowOffset = 160;
+  static const unsigned SystemZVAListTagSize = 32;
+  static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
+  static const unsigned SystemZRegSaveAreaPtrOffset = 24;
+
+  Function &F;
+  MemorySanitizer &MS;
+  MemorySanitizerVisitor &MSV;
+  Value *VAArgTLSCopy = nullptr;
+  Value *VAArgTLSOriginCopy = nullptr;
+  Value *VAArgOverflowSize = nullptr;
+
+  SmallVector<CallInst *, 16> VAStartInstrumentationList;
+
+  enum class ArgKind {
+    GeneralPurpose,
+    FloatingPoint,
+    Vector,
+    Memory,
+    Indirect,
+  };
+
+  enum class ShadowExtension { None, Zero, Sign };
+
+  VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
+                      MemorySanitizerVisitor &MSV)
+      : F(F), MS(MS), MSV(MSV) {}
+
+  ArgKind classifyArgument(Type *T, bool IsSoftFloatABI) {
+    // T is a SystemZABIInfo::classifyArgumentType() output, and there are
+    // only a few possibilities of what it can be. In particular, enums, single
+    // element structs and large types have already been taken care of.
+
+    // Some i128 and fp128 arguments are converted to pointers only in the
+    // back end.
+    if (T->isIntegerTy(128) || T->isFP128Ty())
+      return ArgKind::Indirect;
+    if (T->isFloatingPointTy())
+      return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
+    if (T->isIntegerTy() || T->isPointerTy())
+      return ArgKind::GeneralPurpose;
+    if (T->isVectorTy())
+      return ArgKind::Vector;
+    return ArgKind::Memory;
+  }
+
+  ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
+    // ABI says: "One of the simple integer types no more than 64 bits wide.
+    // ... If such an argument is shorter than 64 bits, replace it by a full
+    // 64-bit integer representing the same number, using sign or zero
+    // extension". Shadow for an integer argument has the same type as the
+    // argument itself, so it can be sign or zero extended as well.
+    bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt);
+    bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt);
+    if (ZExt) {
+      assert(!SExt);
+      return ShadowExtension::Zero;
+    }
+    if (SExt) {
+      assert(!ZExt);
+      return ShadowExtension::Sign;
+    }
+    return ShadowExtension::None;
+  }
+
+  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
+    bool IsSoftFloatABI = CB.getCalledFunction()
+                              ->getFnAttribute("use-soft-float")
+                              .getValueAsString() == "true";
+    unsigned GpOffset = SystemZGpOffset;
+    unsigned FpOffset = SystemZFpOffset;
+    unsigned VrIndex = 0;
+    unsigned OverflowOffset = SystemZOverflowOffset;
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
+         ++ArgIt) {
+      Value *A = *ArgIt;
+      unsigned ArgNo = CB.getArgOperandNo(ArgIt);
+      bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
+      // SystemZABIInfo does not produce ByVal parameters.
+      assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
+      Type *T = A->getType();
+      ArgKind AK = classifyArgument(T, IsSoftFloatABI);
+      if (AK == ArgKind::Indirect) {
+        T = PointerType::get(T, 0);
+        AK = ArgKind::GeneralPurpose;
+      }
+      if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
+        AK = ArgKind::Memory;
+      if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
+        AK = ArgKind::Memory;
+      if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
+        AK = ArgKind::Memory;
+      Value *ShadowBase = nullptr;
+      Value *OriginBase = nullptr;
+      ShadowExtension SE = ShadowExtension::None;
+      switch (AK) {
+      case ArgKind::GeneralPurpose: {
+        // Always keep track of GpOffset, but store shadow only for varargs.
+        uint64_t ArgSize = 8;
+        if (GpOffset + ArgSize <= kParamTLSSize) {
+          if (!IsFixed) {
+            SE = getShadowExtension(CB, ArgNo);
+            uint64_t GapSize = 0;
+            if (SE == ShadowExtension::None) {
+              uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
+              assert(ArgAllocSize <= ArgSize);
+              GapSize = ArgSize - ArgAllocSize;
+            }
+            ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
+            if (MS.TrackOrigins)
+              OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
+          }
+          GpOffset += ArgSize;
+        } else {
+          GpOffset = kParamTLSSize;
+        }
+        break;
+      }
+      case ArgKind::FloatingPoint: {
+        // Always keep track of FpOffset, but store shadow only for varargs.
+        uint64_t ArgSize = 8;
+        if (FpOffset + ArgSize <= kParamTLSSize) {
+          if (!IsFixed) {
+            // PoP says: "A short floating-point datum requires only the
+            // left-most 32 bit positions of a floating-point register".
+            // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
+            // don't extend shadow and don't mind the gap.
+            ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
+            if (MS.TrackOrigins)
+              OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
+          }
+          FpOffset += ArgSize;
+        } else {
+          FpOffset = kParamTLSSize;
+        }
+        break;
+      }
+      case ArgKind::Vector: {
+        // Keep track of VrIndex. No need to store shadow, since vector varargs
+        // go through AK_Memory.
+        assert(IsFixed);
+        VrIndex++;
+        break;
+      }
+      case ArgKind::Memory: {
+        // Keep track of OverflowOffset and store shadow only for varargs.
+        // Ignore fixed args, since we need to copy only the vararg portion of
+        // the overflow area shadow.
+        if (!IsFixed) {
+          uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
+          uint64_t ArgSize = alignTo(ArgAllocSize, 8);
+          if (OverflowOffset + ArgSize <= kParamTLSSize) {
+            SE = getShadowExtension(CB, ArgNo);
+            uint64_t GapSize =
+                SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
+            ShadowBase =
+                getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
+            if (MS.TrackOrigins)
+              OriginBase =
+                  getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
+            OverflowOffset += ArgSize;
+          } else {
+            OverflowOffset = kParamTLSSize;
+          }
+        }
+        break;
+      }
+      case ArgKind::Indirect:
+        llvm_unreachable("Indirect must be converted to GeneralPurpose");
+      }
+      if (ShadowBase == nullptr)
+        continue;
+      Value *Shadow = MSV.getShadow(A);
+      if (SE != ShadowExtension::None)
+        Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
+                                      /*Signed*/ SE == ShadowExtension::Sign);
+      ShadowBase = IRB.CreateIntToPtr(
+          ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s");
+      IRB.CreateStore(Shadow, ShadowBase);
+      if (MS.TrackOrigins) {
+        Value *Origin = MSV.getOrigin(A);
+        unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
+        MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
+                        kMinOriginAlignment);
+      }
+    }
+    Constant *OverflowSize = ConstantInt::get(
+        IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
+    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+  }
+
+  Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+    return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+  }
+
+  Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+                              "_msarg_va_o");
+  }
+
+  void unpoisonVAListTagForInst(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr, *OriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(ShadowPtr, OriginPtr) =
+        MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
+                               /*isStore*/ true);
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     SystemZVAListTagSize, Alignment, false);
+  }
+
+  void visitVAStartInst(VAStartInst &I) override {
+    VAStartInstrumentationList.push_back(&I);
+    unpoisonVAListTagForInst(I);
+  }
+
+  void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); }
+
+  void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
+    Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+    Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
+        IRB.CreateAdd(
+            IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+            ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
+        PointerType::get(RegSaveAreaPtrTy, 0));
+    Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
+    Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
+        MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
+                               /*isStore*/ true);
+    // TODO(iii): copy only fragments filled by visitCallBase()
+    IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
+                     SystemZRegSaveAreaSize);
+    if (MS.TrackOrigins)
+      IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
+                       Alignment, SystemZRegSaveAreaSize);
+  }
+
+  void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
+    Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+    Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
+        IRB.CreateAdd(
+            IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+            ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
+        PointerType::get(OverflowArgAreaPtrTy, 0));
+    Value *OverflowArgAreaPtr =
+        IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
+    Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
+    const Align Alignment = Align(8);
+    std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
+        MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
+                               Alignment, /*isStore*/ true);
+    Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
+                                           SystemZOverflowOffset);
+    IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
+                     VAArgOverflowSize);
+    if (MS.TrackOrigins) {
+      SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
+                                      SystemZOverflowOffset);
+      IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
+                       VAArgOverflowSize);
+    }
+  }
+
+  void finalizeInstrumentation() override {
+    assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+           "finalizeInstrumentation called twice");
+    if (!VAStartInstrumentationList.empty()) {
+      // If there is a va_start in this function, make a backup copy of
+      // va_arg_tls somewhere in the function entry block.
       IRBuilder<> IRB(MSV.FnPrologueEnd);
-      VAArgOverflowSize = 
-          IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); 
-      Value *CopySize = 
-          IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset), 
-                        VAArgOverflowSize); 
-      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); 
-      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize); 
-      if (MS.TrackOrigins) { 
-        VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); 
-        IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS, 
-                         Align(8), CopySize); 
-      } 
-    } 
- 
-    // Instrument va_start. 
-    // Copy va_list shadow from the backup copy of the TLS contents. 
-    for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size(); 
-         VaStartNo < VaStartNum; VaStartNo++) { 
-      CallInst *OrigInst = VAStartInstrumentationList[VaStartNo]; 
-      IRBuilder<> IRB(OrigInst->getNextNode()); 
-      Value *VAListTag = OrigInst->getArgOperand(0); 
-      copyRegSaveArea(IRB, VAListTag); 
-      copyOverflowArea(IRB, VAListTag); 
-    } 
-  } 
-}; 
- 
-/// A no-op implementation of VarArgHelper. 
-struct VarArgNoOpHelper : public VarArgHelper { 
-  VarArgNoOpHelper(Function &F, MemorySanitizer &MS, 
-                   MemorySanitizerVisitor &MSV) {} 
- 
-  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {} 
- 
-  void visitVAStartInst(VAStartInst &I) override {} 
- 
-  void visitVACopyInst(VACopyInst &I) override {} 
- 
-  void finalizeInstrumentation() override {} 
-}; 
- 
-} // end anonymous namespace 
- 
-static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, 
-                                        MemorySanitizerVisitor &Visitor) { 
-  // VarArg handling is only implemented on AMD64. False positives are possible 
-  // on other platforms. 
-  Triple TargetTriple(Func.getParent()->getTargetTriple()); 
-  if (TargetTriple.getArch() == Triple::x86_64) 
-    return new VarArgAMD64Helper(Func, Msan, Visitor); 
-  else if (TargetTriple.isMIPS64()) 
-    return new VarArgMIPS64Helper(Func, Msan, Visitor); 
-  else if (TargetTriple.getArch() == Triple::aarch64) 
-    return new VarArgAArch64Helper(Func, Msan, Visitor); 
-  else if (TargetTriple.getArch() == Triple::ppc64 || 
-           TargetTriple.getArch() == Triple::ppc64le) 
-    return new VarArgPowerPC64Helper(Func, Msan, Visitor); 
-  else if (TargetTriple.getArch() == Triple::systemz) 
-    return new VarArgSystemZHelper(Func, Msan, Visitor); 
-  else 
-    return new VarArgNoOpHelper(Func, Msan, Visitor); 
-} 
- 
-bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) { 
-  if (!CompileKernel && F.getName() == kMsanModuleCtorName) 
-    return false; 
- 
-  MemorySanitizerVisitor Visitor(F, *this, TLI); 
- 
-  // Clear out readonly/readnone attributes. 
-  AttrBuilder B; 
-  B.addAttribute(Attribute::ReadOnly) 
-      .addAttribute(Attribute::ReadNone) 
-      .addAttribute(Attribute::WriteOnly) 
-      .addAttribute(Attribute::ArgMemOnly) 
-      .addAttribute(Attribute::Speculatable); 
-  F.removeAttributes(AttributeList::FunctionIndex, B); 
- 
-  return Visitor.runOnFunction(); 
-} 
+      VAArgOverflowSize =
+          IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
+      Value *CopySize =
+          IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
+                        VAArgOverflowSize);
+      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+      IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
+      if (MS.TrackOrigins) {
+        VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+        IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
+                         Align(8), CopySize);
+      }
+    }
+
+    // Instrument va_start.
+    // Copy va_list shadow from the backup copy of the TLS contents.
+    for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size();
+         VaStartNo < VaStartNum; VaStartNo++) {
+      CallInst *OrigInst = VAStartInstrumentationList[VaStartNo];
+      IRBuilder<> IRB(OrigInst->getNextNode());
+      Value *VAListTag = OrigInst->getArgOperand(0);
+      copyRegSaveArea(IRB, VAListTag);
+      copyOverflowArea(IRB, VAListTag);
+    }
+  }
+};
+
+/// A no-op implementation of VarArgHelper.
+struct VarArgNoOpHelper : public VarArgHelper {
+  VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
+                   MemorySanitizerVisitor &MSV) {}
+
+  void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
+
+  void visitVAStartInst(VAStartInst &I) override {}
+
+  void visitVACopyInst(VACopyInst &I) override {}
+
+  void finalizeInstrumentation() override {}
+};
+
+} // end anonymous namespace
+
+static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+                                        MemorySanitizerVisitor &Visitor) {
+  // VarArg handling is only implemented on AMD64. False positives are possible
+  // on other platforms.
+  Triple TargetTriple(Func.getParent()->getTargetTriple());
+  if (TargetTriple.getArch() == Triple::x86_64)
+    return new VarArgAMD64Helper(Func, Msan, Visitor);
+  else if (TargetTriple.isMIPS64())
+    return new VarArgMIPS64Helper(Func, Msan, Visitor);
+  else if (TargetTriple.getArch() == Triple::aarch64)
+    return new VarArgAArch64Helper(Func, Msan, Visitor);
+  else if (TargetTriple.getArch() == Triple::ppc64 ||
+           TargetTriple.getArch() == Triple::ppc64le)
+    return new VarArgPowerPC64Helper(Func, Msan, Visitor);
+  else if (TargetTriple.getArch() == Triple::systemz)
+    return new VarArgSystemZHelper(Func, Msan, Visitor);
+  else
+    return new VarArgNoOpHelper(Func, Msan, Visitor);
+}
+
+bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
+  if (!CompileKernel && F.getName() == kMsanModuleCtorName)
+    return false;
+
+  MemorySanitizerVisitor Visitor(F, *this, TLI);
+
+  // Clear out readonly/readnone attributes.
+  AttrBuilder B;
+  B.addAttribute(Attribute::ReadOnly)
+      .addAttribute(Attribute::ReadNone)
+      .addAttribute(Attribute::WriteOnly)
+      .addAttribute(Attribute::ArgMemOnly)
+      .addAttribute(Attribute::Speculatable);
+  F.removeAttributes(AttributeList::FunctionIndex, B);
+
+  return Visitor.runOnFunction();
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 002a03afad..be6c8c6310 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1,253 +1,253 @@
-//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements PGO instrumentation using a minimum spanning tree based 
-// on the following paper: 
-//   [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points 
-//   for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, 
-//   Issue 3, pp 313-322 
-// The idea of the algorithm based on the fact that for each node (except for 
-// the entry and exit), the sum of incoming edge counts equals the sum of 
-// outgoing edge counts. The count of edge on spanning tree can be derived from 
-// those edges not on the spanning tree. Knuth proves this method instruments 
-// the minimum number of edges. 
-// 
-// The minimal spanning tree here is actually a maximum weight tree -- on-tree 
-// edges have higher frequencies (more likely to execute). The idea is to 
-// instrument those less frequently executed edges to reduce the runtime 
-// overhead of instrumented binaries. 
-// 
-// This file contains two passes: 
-// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge 
-// count profile, and generates the instrumentation for indirect call 
-// profiling. 
-// (2) Pass PGOInstrumentationUse which reads the edge count profile and 
-// annotates the branch weights. It also reads the indirect call value 
-// profiling records and annotate the indirect call instructions. 
-// 
-// To get the precise counter information, These two passes need to invoke at 
-// the same compilation point (so they see the same IR). For pass 
-// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For 
-// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and 
-// the profile is opened in module level and passed to each PGOUseFunc instance. 
-// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put 
-// in class FuncPGOInstrumentation. 
-// 
-// Class PGOEdge represents a CFG edge and some auxiliary information. Class 
-// BBInfo contains auxiliary information for each BB. These two classes are used 
-// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived 
-// class of PGOEdge and BBInfo, respectively. They contains extra data structure 
-// used in populating profile counters. 
-// The MST implementation is in Class CFGMST (CFGMST.h). 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 
-#include "CFGMST.h" 
-#include "ValueProfileCollector.h" 
-#include "llvm/ADT/APInt.h" 
-#include "llvm/ADT/ArrayRef.h" 
-#include "llvm/ADT/MapVector.h" 
-#include "llvm/ADT/STLExtras.h" 
-#include "llvm/ADT/SmallVector.h" 
-#include "llvm/ADT/Statistic.h" 
-#include "llvm/ADT/StringRef.h" 
-#include "llvm/ADT/Triple.h" 
-#include "llvm/ADT/Twine.h" 
-#include "llvm/ADT/iterator.h" 
-#include "llvm/ADT/iterator_range.h" 
-#include "llvm/Analysis/BlockFrequencyInfo.h" 
-#include "llvm/Analysis/BranchProbabilityInfo.h" 
-#include "llvm/Analysis/CFG.h" 
-#include "llvm/Analysis/EHPersonalities.h" 
-#include "llvm/Analysis/LoopInfo.h" 
-#include "llvm/Analysis/OptimizationRemarkEmitter.h" 
-#include "llvm/Analysis/ProfileSummaryInfo.h" 
-#include "llvm/IR/Attributes.h" 
-#include "llvm/IR/BasicBlock.h" 
-#include "llvm/IR/CFG.h" 
-#include "llvm/IR/Comdat.h" 
-#include "llvm/IR/Constant.h" 
-#include "llvm/IR/Constants.h" 
-#include "llvm/IR/DiagnosticInfo.h" 
-#include "llvm/IR/Dominators.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/GlobalAlias.h" 
-#include "llvm/IR/GlobalValue.h" 
-#include "llvm/IR/GlobalVariable.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/InstVisitor.h" 
-#include "llvm/IR/InstrTypes.h" 
-#include "llvm/IR/Instruction.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/IR/Intrinsics.h" 
-#include "llvm/IR/LLVMContext.h" 
-#include "llvm/IR/MDBuilder.h" 
-#include "llvm/IR/Module.h" 
-#include "llvm/IR/PassManager.h" 
-#include "llvm/IR/ProfileSummary.h" 
-#include "llvm/IR/Type.h" 
-#include "llvm/IR/Value.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Pass.h" 
-#include "llvm/ProfileData/InstrProf.h" 
-#include "llvm/ProfileData/InstrProfReader.h" 
-#include "llvm/Support/BranchProbability.h" 
-#include "llvm/Support/CRC.h" 
-#include "llvm/Support/Casting.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/DOTGraphTraits.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/Error.h" 
-#include "llvm/Support/ErrorHandling.h" 
-#include "llvm/Support/GraphWriter.h" 
-#include "llvm/Support/raw_ostream.h" 
-#include "llvm/Transforms/Instrumentation.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include <algorithm> 
-#include <cassert> 
-#include <cstdint> 
-#include <memory> 
-#include <numeric> 
-#include <string> 
-#include <unordered_map> 
-#include <utility> 
-#include <vector> 
- 
-using namespace llvm; 
-using ProfileCount = Function::ProfileCount; 
-using VPCandidateInfo = ValueProfileCollector::CandidateInfo; 
- 
-#define DEBUG_TYPE "pgo-instrumentation" 
- 
-STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); 
-STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented."); 
-STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented."); 
-STATISTIC(NumOfPGOEdge, "Number of edges."); 
-STATISTIC(NumOfPGOBB, "Number of basic-blocks."); 
-STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); 
-STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); 
-STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); 
-STATISTIC(NumOfPGOMissing, "Number of functions without profile."); 
-STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); 
-STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); 
-STATISTIC(NumOfCSPGOSelectInsts, 
-          "Number of select instruction instrumented in CSPGO."); 
-STATISTIC(NumOfCSPGOMemIntrinsics, 
-          "Number of mem intrinsics instrumented in CSPGO."); 
-STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); 
-STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); 
-STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); 
-STATISTIC(NumOfCSPGOFunc, 
-          "Number of functions having valid profile counts in CSPGO."); 
-STATISTIC(NumOfCSPGOMismatch, 
-          "Number of functions having mismatch profile in CSPGO."); 
-STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); 
- 
-// Command line option to specify the file to read profile from. This is 
-// mainly used for testing. 
-static cl::opt<std::string> 
-    PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, 
-                       cl::value_desc("filename"), 
-                       cl::desc("Specify the path of profile data file. This is" 
-                                "mainly for test purpose.")); 
-static cl::opt<std::string> PGOTestProfileRemappingFile( 
-    "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden, 
-    cl::value_desc("filename"), 
-    cl::desc("Specify the path of profile remapping file. This is mainly for " 
-             "test purpose.")); 
- 
-// Command line option to disable value profiling. The default is false: 
-// i.e. value profiling is enabled by default. This is for debug purpose. 
-static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false), 
-                                           cl::Hidden, 
-                                           cl::desc("Disable Value Profiling")); 
- 
-// Command line option to set the maximum number of VP annotations to write to 
-// the metadata for a single indirect call callsite. 
-static cl::opt<unsigned> MaxNumAnnotations( 
-    "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore, 
-    cl::desc("Max number of annotations for a single indirect " 
-             "call callsite")); 
- 
-// Command line option to set the maximum number of value annotations 
-// to write to the metadata for a single memop intrinsic. 
-static cl::opt<unsigned> MaxNumMemOPAnnotations( 
-    "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore, 
-    cl::desc("Max number of preicise value annotations for a single memop" 
-             "intrinsic")); 
- 
-// Command line option to control appending FunctionHash to the name of a COMDAT 
-// function. This is to avoid the hash mismatch caused by the preinliner. 
-static cl::opt<bool> DoComdatRenaming( 
-    "do-comdat-renaming", cl::init(false), cl::Hidden, 
-    cl::desc("Append function hash to the name of COMDAT function to avoid " 
-             "function hash mismatch due to the preinliner")); 
- 
-// Command line option to enable/disable the warning about missing profile 
-// information. 
-static cl::opt<bool> 
-    PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden, 
-                   cl::desc("Use this option to turn on/off " 
-                            "warnings about missing profile data for " 
-                            "functions.")); 
- 
-// Command line option to enable/disable the warning about a hash mismatch in 
-// the profile data. 
-static cl::opt<bool> 
-    NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden, 
-                      cl::desc("Use this option to turn off/on " 
-                               "warnings about profile cfg mismatch.")); 
- 
-// Command line option to enable/disable the warning about a hash mismatch in 
-// the profile data for Comdat functions, which often turns out to be false 
-// positive due to the pre-instrumentation inline. 
-static cl::opt<bool> 
-    NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), 
-                            cl::Hidden, 
-                            cl::desc("The option is used to turn on/off " 
-                                     "warnings about hash mismatch for comdat " 
-                                     "functions.")); 
- 
-// Command line option to enable/disable select instruction instrumentation. 
-static cl::opt<bool> 
-    PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden, 
-                   cl::desc("Use this option to turn on/off SELECT " 
-                            "instruction instrumentation. ")); 
- 
-// Command line option to turn on CFG dot or text dump of raw profile counts 
-static cl::opt<PGOViewCountsType> PGOViewRawCounts( 
-    "pgo-view-raw-counts", cl::Hidden, 
-    cl::desc("A boolean option to show CFG dag or text " 
-             "with raw profile counts from " 
-             "profile data. See also option " 
-             "-pgo-view-counts. To limit graph " 
-             "display to only one function, use " 
-             "filtering option -view-bfi-func-name."), 
-    cl::values(clEnumValN(PGOVCT_None, "none", "do not show."), 
-               clEnumValN(PGOVCT_Graph, "graph", "show a graph."), 
-               clEnumValN(PGOVCT_Text, "text", "show in text."))); 
- 
-// Command line option to enable/disable memop intrinsic call.size profiling. 
-static cl::opt<bool> 
-    PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, 
-                  cl::desc("Use this option to turn on/off " 
-                           "memory intrinsic size profiling.")); 
- 
-// Emit branch probability as optimization remarks. 
-static cl::opt<bool> 
-    EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, 
-                          cl::desc("When this option is on, the annotated " 
-                                   "branch probability will be emitted as " 
-                                   "optimization remarks: -{Rpass|" 
-                                   "pass-remarks}=pgo-instrumentation")); 
- 
+//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements PGO instrumentation using a minimum spanning tree based
+// on the following paper:
+//   [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
+//   for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
+//   Issue 3, pp 313-322
+// The idea of the algorithm based on the fact that for each node (except for
+// the entry and exit), the sum of incoming edge counts equals the sum of
+// outgoing edge counts. The count of edge on spanning tree can be derived from
+// those edges not on the spanning tree. Knuth proves this method instruments
+// the minimum number of edges.
+//
+// The minimal spanning tree here is actually a maximum weight tree -- on-tree
+// edges have higher frequencies (more likely to execute). The idea is to
+// instrument those less frequently executed edges to reduce the runtime
+// overhead of instrumented binaries.
+//
+// This file contains two passes:
+// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
+// count profile, and generates the instrumentation for indirect call
+// profiling.
+// (2) Pass PGOInstrumentationUse which reads the edge count profile and
+// annotates the branch weights. It also reads the indirect call value
+// profiling records and annotate the indirect call instructions.
+//
+// To get the precise counter information, These two passes need to invoke at
+// the same compilation point (so they see the same IR). For pass
+// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
+// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
+// the profile is opened in module level and passed to each PGOUseFunc instance.
+// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
+// in class FuncPGOInstrumentation.
+//
+// Class PGOEdge represents a CFG edge and some auxiliary information. Class
+// BBInfo contains auxiliary information for each BB. These two classes are used
+// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
+// class of PGOEdge and BBInfo, respectively. They contains extra data structure
+// used in populating profile counters.
+// The MST implementation is in Class CFGMST (CFGMST.h).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "CFGMST.h"
+#include "ValueProfileCollector.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfileSummary.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using ProfileCount = Function::ProfileCount;
+using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
+
+#define DEBUG_TYPE "pgo-instrumentation"
+
+STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
+STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
+STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
+STATISTIC(NumOfPGOEdge, "Number of edges.");
+STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
+STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
+STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
+STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
+STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
+STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
+STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOSelectInsts,
+          "Number of select instruction instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOMemIntrinsics,
+          "Number of mem intrinsics instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
+STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
+STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
+STATISTIC(NumOfCSPGOFunc,
+          "Number of functions having valid profile counts in CSPGO.");
+STATISTIC(NumOfCSPGOMismatch,
+          "Number of functions having mismatch profile in CSPGO.");
+STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
+
+// Command line option to specify the file to read profile from. This is
+// mainly used for testing.
+static cl::opt<std::string>
+    PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
+                       cl::value_desc("filename"),
+                       cl::desc("Specify the path of profile data file. This is"
+                                "mainly for test purpose."));
+static cl::opt<std::string> PGOTestProfileRemappingFile(
+    "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
+    cl::value_desc("filename"),
+    cl::desc("Specify the path of profile remapping file. This is mainly for "
+             "test purpose."));
+
+// Command line option to disable value profiling. The default is false:
+// i.e. value profiling is enabled by default. This is for debug purpose.
+static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
+                                           cl::Hidden,
+                                           cl::desc("Disable Value Profiling"));
+
+// Command line option to set the maximum number of VP annotations to write to
+// the metadata for a single indirect call callsite.
+static cl::opt<unsigned> MaxNumAnnotations(
+    "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
+    cl::desc("Max number of annotations for a single indirect "
+             "call callsite"));
+
+// Command line option to set the maximum number of value annotations
+// to write to the metadata for a single memop intrinsic.
+static cl::opt<unsigned> MaxNumMemOPAnnotations(
+    "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
+    cl::desc("Max number of preicise value annotations for a single memop"
+             "intrinsic"));
+
+// Command line option to control appending FunctionHash to the name of a COMDAT
+// function. This is to avoid the hash mismatch caused by the preinliner.
+static cl::opt<bool> DoComdatRenaming(
+    "do-comdat-renaming", cl::init(false), cl::Hidden,
+    cl::desc("Append function hash to the name of COMDAT function to avoid "
+             "function hash mismatch due to the preinliner"));
+
+// Command line option to enable/disable the warning about missing profile
+// information.
+static cl::opt<bool>
+    PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
+                   cl::desc("Use this option to turn on/off "
+                            "warnings about missing profile data for "
+                            "functions."));
+
+// Command line option to enable/disable the warning about a hash mismatch in
+// the profile data.
+static cl::opt<bool>
+    NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
+                      cl::desc("Use this option to turn off/on "
+                               "warnings about profile cfg mismatch."));
+
+// Command line option to enable/disable the warning about a hash mismatch in
+// the profile data for Comdat functions, which often turns out to be false
+// positive due to the pre-instrumentation inline.
+static cl::opt<bool>
+    NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
+                            cl::Hidden,
+                            cl::desc("The option is used to turn on/off "
+                                     "warnings about hash mismatch for comdat "
+                                     "functions."));
+
+// Command line option to enable/disable select instruction instrumentation.
+static cl::opt<bool>
+    PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
+                   cl::desc("Use this option to turn on/off SELECT "
+                            "instruction instrumentation. "));
+
+// Command line option to turn on CFG dot or text dump of raw profile counts
+static cl::opt<PGOViewCountsType> PGOViewRawCounts(
+    "pgo-view-raw-counts", cl::Hidden,
+    cl::desc("A boolean option to show CFG dag or text "
+             "with raw profile counts from "
+             "profile data. See also option "
+             "-pgo-view-counts. To limit graph "
+             "display to only one function, use "
+             "filtering option -view-bfi-func-name."),
+    cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
+               clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
+               clEnumValN(PGOVCT_Text, "text", "show in text.")));
+
+// Command line option to enable/disable memop intrinsic call.size profiling.
+static cl::opt<bool>
+    PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
+                  cl::desc("Use this option to turn on/off "
+                           "memory intrinsic size profiling."));
+
+// Emit branch probability as optimization remarks.
+static cl::opt<bool>
+    EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
+                          cl::desc("When this option is on, the annotated "
+                                   "branch probability will be emitted as "
+                                   "optimization remarks: -{Rpass|"
+                                   "pass-remarks}=pgo-instrumentation"));
+
 static cl::opt<bool> PGOInstrumentEntry(
     "pgo-instrument-entry", cl::init(false), cl::Hidden,
     cl::desc("Force to instrument function entry basicblock."));
@@ -280,394 +280,394 @@ static cl::opt<unsigned> PGOVerifyBFICutoff(
     cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose "
              "profile count value is below."));
 
-// Command line option to turn on CFG dot dump after profile annotation. 
-// Defined in Analysis/BlockFrequencyInfo.cpp:  -pgo-view-counts 
-extern cl::opt<PGOViewCountsType> PGOViewCounts; 
- 
-// Command line option to specify the name of the function for CFG dump 
-// Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name= 
-extern cl::opt<std::string> ViewBlockFreqFuncName; 
- 
+// Command line option to turn on CFG dot dump after profile annotation.
+// Defined in Analysis/BlockFrequencyInfo.cpp:  -pgo-view-counts
+extern cl::opt<PGOViewCountsType> PGOViewCounts;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
 static cl::opt<bool>
     PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
                      cl::desc("Use the old CFG function hashing"));
 
-// Return a string describing the branch condition that can be 
-// used in static branch probability heuristics: 
-static std::string getBranchCondString(Instruction *TI) { 
-  BranchInst *BI = dyn_cast<BranchInst>(TI); 
-  if (!BI || !BI->isConditional()) 
-    return std::string(); 
- 
-  Value *Cond = BI->getCondition(); 
-  ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 
-  if (!CI) 
-    return std::string(); 
- 
-  std::string result; 
-  raw_string_ostream OS(result); 
-  OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; 
-  CI->getOperand(0)->getType()->print(OS, true); 
- 
-  Value *RHS = CI->getOperand(1); 
-  ConstantInt *CV = dyn_cast<ConstantInt>(RHS); 
-  if (CV) { 
-    if (CV->isZero()) 
-      OS << "_Zero"; 
-    else if (CV->isOne()) 
-      OS << "_One"; 
-    else if (CV->isMinusOne()) 
-      OS << "_MinusOne"; 
-    else 
-      OS << "_Const"; 
-  } 
-  OS.flush(); 
-  return result; 
-} 
- 
-static const char *ValueProfKindDescr[] = { 
-#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr, 
-#include "llvm/ProfileData/InstrProfData.inc" 
-}; 
- 
-namespace { 
- 
-/// The select instruction visitor plays three roles specified 
-/// by the mode. In \c VM_counting mode, it simply counts the number of 
-/// select instructions. In \c VM_instrument mode, it inserts code to count 
-/// the number times TrueValue of select is taken. In \c VM_annotate mode, 
-/// it reads the profile data and annotate the select instruction with metadata. 
-enum VisitMode { VM_counting, VM_instrument, VM_annotate }; 
-class PGOUseFunc; 
- 
-/// Instruction Visitor class to visit select instructions. 
-struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> { 
-  Function &F; 
-  unsigned NSIs = 0;             // Number of select instructions instrumented. 
-  VisitMode Mode = VM_counting;  // Visiting mode. 
-  unsigned *CurCtrIdx = nullptr; // Pointer to current counter index. 
-  unsigned TotalNumCtrs = 0;     // Total number of counters 
-  GlobalVariable *FuncNameVar = nullptr; 
-  uint64_t FuncHash = 0; 
-  PGOUseFunc *UseFunc = nullptr; 
- 
-  SelectInstVisitor(Function &Func) : F(Func) {} 
- 
-  void countSelects(Function &Func) { 
-    NSIs = 0; 
-    Mode = VM_counting; 
-    visit(Func); 
-  } 
- 
-  // Visit the IR stream and instrument all select instructions. \p 
-  // Ind is a pointer to the counter index variable; \p TotalNC 
-  // is the total number of counters; \p FNV is the pointer to the 
-  // PGO function name var; \p FHash is the function hash. 
-  void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC, 
-                         GlobalVariable *FNV, uint64_t FHash) { 
-    Mode = VM_instrument; 
-    CurCtrIdx = Ind; 
-    TotalNumCtrs = TotalNC; 
-    FuncHash = FHash; 
-    FuncNameVar = FNV; 
-    visit(Func); 
-  } 
- 
-  // Visit the IR stream and annotate all select instructions. 
-  void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) { 
-    Mode = VM_annotate; 
-    UseFunc = UF; 
-    CurCtrIdx = Ind; 
-    visit(Func); 
-  } 
- 
-  void instrumentOneSelectInst(SelectInst &SI); 
-  void annotateOneSelectInst(SelectInst &SI); 
- 
-  // Visit \p SI instruction and perform tasks according to visit mode. 
-  void visitSelectInst(SelectInst &SI); 
- 
-  // Return the number of select instructions. This needs be called after 
-  // countSelects(). 
-  unsigned getNumOfSelectInsts() const { return NSIs; } 
-}; 
- 
- 
-class PGOInstrumentationGenLegacyPass : public ModulePass { 
-public: 
-  static char ID; 
- 
-  PGOInstrumentationGenLegacyPass(bool IsCS = false) 
-      : ModulePass(ID), IsCS(IsCS) { 
-    initializePGOInstrumentationGenLegacyPassPass( 
-        *PassRegistry::getPassRegistry()); 
-  } 
- 
-  StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } 
- 
-private: 
-  // Is this is context-sensitive instrumentation. 
-  bool IsCS; 
-  bool runOnModule(Module &M) override; 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.addRequired<BlockFrequencyInfoWrapperPass>(); 
-    AU.addRequired<TargetLibraryInfoWrapperPass>(); 
-  } 
-}; 
- 
-class PGOInstrumentationUseLegacyPass : public ModulePass { 
-public: 
-  static char ID; 
- 
-  // Provide the profile filename as the parameter. 
-  PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) 
-      : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { 
-    if (!PGOTestProfileFile.empty()) 
-      ProfileFileName = PGOTestProfileFile; 
-    initializePGOInstrumentationUseLegacyPassPass( 
-        *PassRegistry::getPassRegistry()); 
-  } 
- 
-  StringRef getPassName() const override { return "PGOInstrumentationUsePass"; } 
- 
-private: 
-  std::string ProfileFileName; 
-  // Is this is context-sensitive instrumentation use. 
-  bool IsCS; 
- 
-  bool runOnModule(Module &M) override; 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.addRequired<ProfileSummaryInfoWrapperPass>(); 
-    AU.addRequired<BlockFrequencyInfoWrapperPass>(); 
-    AU.addRequired<TargetLibraryInfoWrapperPass>(); 
-  } 
-}; 
- 
-class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { 
-public: 
-  static char ID; 
-  StringRef getPassName() const override { 
-    return "PGOInstrumentationGenCreateVarPass"; 
-  } 
-  PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") 
-      : ModulePass(ID), InstrProfileOutput(CSInstrName) { 
-    initializePGOInstrumentationGenCreateVarLegacyPassPass( 
-        *PassRegistry::getPassRegistry()); 
-  } 
- 
-private: 
-  bool runOnModule(Module &M) override { 
-    createProfileFileNameVar(M, InstrProfileOutput); 
+// Return a string describing the branch condition that can be
+// used in static branch probability heuristics:
+static std::string getBranchCondString(Instruction *TI) {
+  BranchInst *BI = dyn_cast<BranchInst>(TI);
+  if (!BI || !BI->isConditional())
+    return std::string();
+
+  Value *Cond = BI->getCondition();
+  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+  if (!CI)
+    return std::string();
+
+  std::string result;
+  raw_string_ostream OS(result);
+  OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
+  CI->getOperand(0)->getType()->print(OS, true);
+
+  Value *RHS = CI->getOperand(1);
+  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
+  if (CV) {
+    if (CV->isZero())
+      OS << "_Zero";
+    else if (CV->isOne())
+      OS << "_One";
+    else if (CV->isMinusOne())
+      OS << "_MinusOne";
+    else
+      OS << "_Const";
+  }
+  OS.flush();
+  return result;
+}
+
+static const char *ValueProfKindDescr[] = {
+#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+namespace {
+
+/// The select instruction visitor plays three roles specified
+/// by the mode. In \c VM_counting mode, it simply counts the number of
+/// select instructions. In \c VM_instrument mode, it inserts code to count
+/// the number times TrueValue of select is taken. In \c VM_annotate mode,
+/// it reads the profile data and annotate the select instruction with metadata.
+enum VisitMode { VM_counting, VM_instrument, VM_annotate };
+class PGOUseFunc;
+
+/// Instruction Visitor class to visit select instructions.
+struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
+  Function &F;
+  unsigned NSIs = 0;             // Number of select instructions instrumented.
+  VisitMode Mode = VM_counting;  // Visiting mode.
+  unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
+  unsigned TotalNumCtrs = 0;     // Total number of counters
+  GlobalVariable *FuncNameVar = nullptr;
+  uint64_t FuncHash = 0;
+  PGOUseFunc *UseFunc = nullptr;
+
+  SelectInstVisitor(Function &Func) : F(Func) {}
+
+  void countSelects(Function &Func) {
+    NSIs = 0;
+    Mode = VM_counting;
+    visit(Func);
+  }
+
+  // Visit the IR stream and instrument all select instructions. \p
+  // Ind is a pointer to the counter index variable; \p TotalNC
+  // is the total number of counters; \p FNV is the pointer to the
+  // PGO function name var; \p FHash is the function hash.
+  void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
+                         GlobalVariable *FNV, uint64_t FHash) {
+    Mode = VM_instrument;
+    CurCtrIdx = Ind;
+    TotalNumCtrs = TotalNC;
+    FuncHash = FHash;
+    FuncNameVar = FNV;
+    visit(Func);
+  }
+
+  // Visit the IR stream and annotate all select instructions.
+  void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
+    Mode = VM_annotate;
+    UseFunc = UF;
+    CurCtrIdx = Ind;
+    visit(Func);
+  }
+
+  void instrumentOneSelectInst(SelectInst &SI);
+  void annotateOneSelectInst(SelectInst &SI);
+
+  // Visit \p SI instruction and perform tasks according to visit mode.
+  void visitSelectInst(SelectInst &SI);
+
+  // Return the number of select instructions. This needs be called after
+  // countSelects().
+  unsigned getNumOfSelectInsts() const { return NSIs; }
+};
+
+
+class PGOInstrumentationGenLegacyPass : public ModulePass {
+public:
+  static char ID;
+
+  PGOInstrumentationGenLegacyPass(bool IsCS = false)
+      : ModulePass(ID), IsCS(IsCS) {
+    initializePGOInstrumentationGenLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
+
+private:
+  // Is this is context-sensitive instrumentation.
+  bool IsCS;
+  bool runOnModule(Module &M) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+};
+
+class PGOInstrumentationUseLegacyPass : public ModulePass {
+public:
+  static char ID;
+
+  // Provide the profile filename as the parameter.
+  PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
+      : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
+    if (!PGOTestProfileFile.empty())
+      ProfileFileName = PGOTestProfileFile;
+    initializePGOInstrumentationUseLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
+
+private:
+  std::string ProfileFileName;
+  // Is this is context-sensitive instrumentation use.
+  bool IsCS;
+
+  bool runOnModule(Module &M) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+};
+
+class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
+public:
+  static char ID;
+  StringRef getPassName() const override {
+    return "PGOInstrumentationGenCreateVarPass";
+  }
+  PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
+      : ModulePass(ID), InstrProfileOutput(CSInstrName) {
+    initializePGOInstrumentationGenCreateVarLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+private:
+  bool runOnModule(Module &M) override {
+    createProfileFileNameVar(M, InstrProfileOutput);
     createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
-    return false; 
-  } 
-  std::string InstrProfileOutput; 
-}; 
- 
-} // end anonymous namespace 
- 
-char PGOInstrumentationGenLegacyPass::ID = 0; 
- 
-INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 
-                      "PGO instrumentation.", false, false) 
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 
-INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", 
-                    "PGO instrumentation.", false, false) 
- 
-ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { 
-  return new PGOInstrumentationGenLegacyPass(IsCS); 
-} 
- 
-char PGOInstrumentationUseLegacyPass::ID = 0; 
- 
-INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 
-                      "Read PGO instrumentation profile.", false, false) 
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 
-INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", 
-                    "Read PGO instrumentation profile.", false, false) 
- 
-ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, 
-                                                        bool IsCS) { 
-  return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); 
-} 
- 
-char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; 
- 
-INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, 
-                "pgo-instr-gen-create-var", 
-                "Create PGO instrumentation version variable for CSPGO.", false, 
-                false) 
- 
-ModulePass * 
-llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { 
-  return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName)); 
-} 
- 
-namespace { 
- 
-/// An MST based instrumentation for PGO 
-/// 
-/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO 
-/// in the function level. 
-struct PGOEdge { 
-  // This class implements the CFG edges. Note the CFG can be a multi-graph. 
-  // So there might be multiple edges with same SrcBB and DestBB. 
-  const BasicBlock *SrcBB; 
-  const BasicBlock *DestBB; 
-  uint64_t Weight; 
-  bool InMST = false; 
-  bool Removed = false; 
-  bool IsCritical = false; 
- 
-  PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 
-      : SrcBB(Src), DestBB(Dest), Weight(W) {} 
- 
-  // Return the information string of an edge. 
-  const std::string infoString() const { 
-    return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + 
-            (IsCritical ? "c" : " ") + "  W=" + Twine(Weight)).str(); 
-  } 
-}; 
- 
-// This class stores the auxiliary information for each BB. 
-struct BBInfo { 
-  BBInfo *Group; 
-  uint32_t Index; 
-  uint32_t Rank = 0; 
- 
-  BBInfo(unsigned IX) : Group(this), Index(IX) {} 
- 
-  // Return the information string of this object. 
-  const std::string infoString() const { 
-    return (Twine("Index=") + Twine(Index)).str(); 
-  } 
- 
-  // Empty function -- only applicable to UseBBInfo. 
-  void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 
- 
-  // Empty function -- only applicable to UseBBInfo. 
-  void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {} 
-}; 
- 
-// This class implements the CFG edges. Note the CFG can be a multi-graph. 
-template <class Edge, class BBInfo> class FuncPGOInstrumentation { 
-private: 
-  Function &F; 
- 
-  // Is this is context-sensitive instrumentation. 
-  bool IsCS; 
- 
-  // A map that stores the Comdat group in function F. 
-  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers; 
- 
-  ValueProfileCollector VPC; 
- 
-  void computeCFGHash(); 
-  void renameComdatFunction(); 
- 
-public: 
-  std::vector<std::vector<VPCandidateInfo>> ValueSites; 
-  SelectInstVisitor SIVisitor; 
-  std::string FuncName; 
-  GlobalVariable *FuncNameVar; 
- 
-  // CFG hash value for this function. 
-  uint64_t FunctionHash = 0; 
- 
-  // The Minimum Spanning Tree of function CFG. 
-  CFGMST<Edge, BBInfo> MST; 
- 
-  // Collect all the BBs that will be instrumented, and store them in 
-  // InstrumentBBs. 
-  void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs); 
- 
-  // Give an edge, find the BB that will be instrumented. 
-  // Return nullptr if there is no BB to be instrumented. 
-  BasicBlock *getInstrBB(Edge *E); 
- 
-  // Return the auxiliary BB information. 
-  BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } 
- 
-  // Return the auxiliary BB information if available. 
-  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); } 
- 
-  // Dump edges and BB information. 
-  void dumpInfo(std::string Str = "") const { 
-    MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + 
-                              Twine(FunctionHash) + "\t" + Str); 
-  } 
- 
-  FuncPGOInstrumentation( 
-      Function &Func, TargetLibraryInfo &TLI, 
-      std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 
-      bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, 
+    return false;
+  }
+  std::string InstrProfileOutput;
+};
+
+} // end anonymous namespace
+
+char PGOInstrumentationGenLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
+                      "PGO instrumentation.", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
+                    "PGO instrumentation.", false, false)
+
+ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
+  return new PGOInstrumentationGenLegacyPass(IsCS);
+}
+
+char PGOInstrumentationUseLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
+                      "Read PGO instrumentation profile.", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
+                    "Read PGO instrumentation profile.", false, false)
+
+ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
+                                                        bool IsCS) {
+  return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
+}
+
+char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
+
+INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
+                "pgo-instr-gen-create-var",
+                "Create PGO instrumentation version variable for CSPGO.", false,
+                false)
+
+ModulePass *
+llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
+  return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName));
+}
+
+namespace {
+
+/// An MST based instrumentation for PGO
+///
+/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
+/// in the function level.
+struct PGOEdge {
+  // This class implements the CFG edges. Note the CFG can be a multi-graph.
+  // So there might be multiple edges with same SrcBB and DestBB.
+  const BasicBlock *SrcBB;
+  const BasicBlock *DestBB;
+  uint64_t Weight;
+  bool InMST = false;
+  bool Removed = false;
+  bool IsCritical = false;
+
+  PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
+      : SrcBB(Src), DestBB(Dest), Weight(W) {}
+
+  // Return the information string of an edge.
+  const std::string infoString() const {
+    return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
+            (IsCritical ? "c" : " ") + "  W=" + Twine(Weight)).str();
+  }
+};
+
+// This class stores the auxiliary information for each BB.
+struct BBInfo {
+  BBInfo *Group;
+  uint32_t Index;
+  uint32_t Rank = 0;
+
+  BBInfo(unsigned IX) : Group(this), Index(IX) {}
+
+  // Return the information string of this object.
+  const std::string infoString() const {
+    return (Twine("Index=") + Twine(Index)).str();
+  }
+
+  // Empty function -- only applicable to UseBBInfo.
+  void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
+
+  // Empty function -- only applicable to UseBBInfo.
+  void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
+};
+
+// This class implements the CFG edges. Note the CFG can be a multi-graph.
+template <class Edge, class BBInfo> class FuncPGOInstrumentation {
+private:
+  Function &F;
+
+  // Is this is context-sensitive instrumentation.
+  bool IsCS;
+
+  // A map that stores the Comdat group in function F.
+  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
+
+  ValueProfileCollector VPC;
+
+  void computeCFGHash();
+  void renameComdatFunction();
+
+public:
+  std::vector<std::vector<VPCandidateInfo>> ValueSites;
+  SelectInstVisitor SIVisitor;
+  std::string FuncName;
+  GlobalVariable *FuncNameVar;
+
+  // CFG hash value for this function.
+  uint64_t FunctionHash = 0;
+
+  // The Minimum Spanning Tree of function CFG.
+  CFGMST<Edge, BBInfo> MST;
+
+  // Collect all the BBs that will be instrumented, and store them in
+  // InstrumentBBs.
+  void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
+
+  // Give an edge, find the BB that will be instrumented.
+  // Return nullptr if there is no BB to be instrumented.
+  BasicBlock *getInstrBB(Edge *E);
+
+  // Return the auxiliary BB information.
+  BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
+
+  // Return the auxiliary BB information if available.
+  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
+
+  // Dump edges and BB information.
+  void dumpInfo(std::string Str = "") const {
+    MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
+                              Twine(FunctionHash) + "\t" + Str);
+  }
+
+  FuncPGOInstrumentation(
+      Function &Func, TargetLibraryInfo &TLI,
+      std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
+      bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
       BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
       bool InstrumentFuncEntry = true)
-      : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), 
+      : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
         ValueSites(IPVK_Last + 1), SIVisitor(Func),
         MST(F, InstrumentFuncEntry, BPI, BFI) {
-    // This should be done before CFG hash computation. 
-    SIVisitor.countSelects(Func); 
-    ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); 
-    if (!IsCS) { 
-      NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 
-      NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 
-      NumOfPGOBB += MST.BBInfos.size(); 
-      ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); 
-    } else { 
-      NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); 
-      NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); 
-      NumOfCSPGOBB += MST.BBInfos.size(); 
-    } 
- 
-    FuncName = getPGOFuncName(F); 
-    computeCFGHash(); 
-    if (!ComdatMembers.empty()) 
-      renameComdatFunction(); 
-    LLVM_DEBUG(dumpInfo("after CFGMST")); 
- 
-    for (auto &E : MST.AllEdges) { 
-      if (E->Removed) 
-        continue; 
-      IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; 
-      if (!E->InMST) 
-        IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; 
-    } 
- 
-    if (CreateGlobalVar) 
-      FuncNameVar = createPGOFuncNameVar(F, FuncName); 
-  } 
-}; 
- 
-} // end anonymous namespace 
- 
-// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index 
+    // This should be done before CFG hash computation.
+    SIVisitor.countSelects(Func);
+    ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
+    if (!IsCS) {
+      NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+      NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
+      NumOfPGOBB += MST.BBInfos.size();
+      ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
+    } else {
+      NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+      NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
+      NumOfCSPGOBB += MST.BBInfos.size();
+    }
+
+    FuncName = getPGOFuncName(F);
+    computeCFGHash();
+    if (!ComdatMembers.empty())
+      renameComdatFunction();
+    LLVM_DEBUG(dumpInfo("after CFGMST"));
+
+    for (auto &E : MST.AllEdges) {
+      if (E->Removed)
+        continue;
+      IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
+      if (!E->InMST)
+        IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
+    }
+
+    if (CreateGlobalVar)
+      FuncNameVar = createPGOFuncNameVar(F, FuncName);
+  }
+};
+
+} // end anonymous namespace
+
+// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
 // of selects, indirect calls, mem ops and edges.
-template <class Edge, class BBInfo> 
-void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { 
-  std::vector<uint8_t> Indexes; 
-  JamCRC JC; 
-  for (auto &BB : F) { 
-    const Instruction *TI = BB.getTerminator(); 
-    for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 
-      BasicBlock *Succ = TI->getSuccessor(I); 
-      auto BI = findBBInfo(Succ); 
-      if (BI == nullptr) 
-        continue; 
-      uint32_t Index = BI->Index; 
-      for (int J = 0; J < 4; J++) 
-        Indexes.push_back((uint8_t)(Index >> (J * 8))); 
-    } 
-  } 
-  JC.update(Indexes); 
- 
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
+  std::vector<uint8_t> Indexes;
+  JamCRC JC;
+  for (auto &BB : F) {
+    const Instruction *TI = BB.getTerminator();
+    for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
+      BasicBlock *Succ = TI->getSuccessor(I);
+      auto BI = findBBInfo(Succ);
+      if (BI == nullptr)
+        continue;
+      uint32_t Index = BI->Index;
+      for (int J = 0; J < 4; J++)
+        Indexes.push_back((uint8_t)(Index >> (J * 8)));
+    }
+  }
+  JC.update(Indexes);
+
   JamCRC JCH;
   if (PGOOldCFGHashing) {
     // Hash format for context sensitive profile. Reserve 4 bits for other
@@ -693,956 +693,956 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
     FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
   }
 
-  // Reserve bit 60-63 for other information purpose. 
-  FunctionHash &= 0x0FFFFFFFFFFFFFFF; 
-  if (IsCS) 
-    NamedInstrProfRecord::setCSFlagInHash(FunctionHash); 
-  LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" 
-                    << " CRC = " << JC.getCRC() 
-                    << ", Selects = " << SIVisitor.getNumOfSelectInsts() 
-                    << ", Edges = " << MST.AllEdges.size() << ", ICSites = " 
+  // Reserve bit 60-63 for other information purpose.
+  FunctionHash &= 0x0FFFFFFFFFFFFFFF;
+  if (IsCS)
+    NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
+  LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
+                    << " CRC = " << JC.getCRC()
+                    << ", Selects = " << SIVisitor.getNumOfSelectInsts()
+                    << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
                     << ValueSites[IPVK_IndirectCallTarget].size());
   if (!PGOOldCFGHashing) {
     LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
                       << ", High32 CRC = " << JCH.getCRC());
   }
   LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
-} 
- 
-// Check if we can safely rename this Comdat function. 
-static bool canRenameComdat( 
-    Function &F, 
-    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 
-  if (!DoComdatRenaming || !canRenameComdatFunc(F, true)) 
-    return false; 
- 
-  // FIXME: Current only handle those Comdat groups that only containing one 
+}
+
+// Check if we can safely rename this Comdat function.
+static bool canRenameComdat(
+    Function &F,
+    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
+  if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
+    return false;
+
+  // FIXME: Current only handle those Comdat groups that only containing one
   // function.
-  // (1) For a Comdat group containing multiple functions, we need to have a 
-  // unique postfix based on the hashes for each function. There is a 
-  // non-trivial code refactoring to do this efficiently. 
-  // (2) Variables can not be renamed, so we can not rename Comdat function in a 
-  // group including global vars. 
-  Comdat *C = F.getComdat(); 
-  for (auto &&CM : make_range(ComdatMembers.equal_range(C))) { 
+  // (1) For a Comdat group containing multiple functions, we need to have a
+  // unique postfix based on the hashes for each function. There is a
+  // non-trivial code refactoring to do this efficiently.
+  // (2) Variables can not be renamed, so we can not rename Comdat function in a
+  // group including global vars.
+  Comdat *C = F.getComdat();
+  for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
     assert(!isa<GlobalAlias>(CM.second));
-    Function *FM = dyn_cast<Function>(CM.second); 
-    if (FM != &F) 
-      return false; 
-  } 
-  return true; 
-} 
- 
-// Append the CFGHash to the Comdat function name. 
-template <class Edge, class BBInfo> 
-void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() { 
-  if (!canRenameComdat(F, ComdatMembers)) 
-    return; 
-  std::string OrigName = F.getName().str(); 
-  std::string NewFuncName = 
-      Twine(F.getName() + "." + Twine(FunctionHash)).str(); 
-  F.setName(Twine(NewFuncName)); 
-  GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F); 
-  FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str(); 
-  Comdat *NewComdat; 
-  Module *M = F.getParent(); 
-  // For AvailableExternallyLinkage functions, change the linkage to 
-  // LinkOnceODR and put them into comdat. This is because after renaming, there 
-  // is no backup external copy available for the function. 
-  if (!F.hasComdat()) { 
-    assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 
-    NewComdat = M->getOrInsertComdat(StringRef(NewFuncName)); 
-    F.setLinkage(GlobalValue::LinkOnceODRLinkage); 
-    F.setComdat(NewComdat); 
-    return; 
-  } 
- 
-  // This function belongs to a single function Comdat group. 
-  Comdat *OrigComdat = F.getComdat(); 
-  std::string NewComdatName = 
-      Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str(); 
-  NewComdat = M->getOrInsertComdat(StringRef(NewComdatName)); 
-  NewComdat->setSelectionKind(OrigComdat->getSelectionKind()); 
- 
-  for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) { 
-    // Must be a function. 
+    Function *FM = dyn_cast<Function>(CM.second);
+    if (FM != &F)
+      return false;
+  }
+  return true;
+}
+
+// Append the CFGHash to the Comdat function name.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
+  if (!canRenameComdat(F, ComdatMembers))
+    return;
+  std::string OrigName = F.getName().str();
+  std::string NewFuncName =
+      Twine(F.getName() + "." + Twine(FunctionHash)).str();
+  F.setName(Twine(NewFuncName));
+  GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
+  FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
+  Comdat *NewComdat;
+  Module *M = F.getParent();
+  // For AvailableExternallyLinkage functions, change the linkage to
+  // LinkOnceODR and put them into comdat. This is because after renaming, there
+  // is no backup external copy available for the function.
+  if (!F.hasComdat()) {
+    assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
+    NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
+    F.setLinkage(GlobalValue::LinkOnceODRLinkage);
+    F.setComdat(NewComdat);
+    return;
+  }
+
+  // This function belongs to a single function Comdat group.
+  Comdat *OrigComdat = F.getComdat();
+  std::string NewComdatName =
+      Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
+  NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
+  NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
+
+  for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
+    // Must be a function.
     cast<Function>(CM.second)->setComdat(NewComdat);
-  } 
-} 
- 
-// Collect all the BBs that will be instruments and return them in 
-// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo. 
-template <class Edge, class BBInfo> 
-void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs( 
-    std::vector<BasicBlock *> &InstrumentBBs) { 
-  // Use a worklist as we will update the vector during the iteration. 
-  std::vector<Edge *> EdgeList; 
-  EdgeList.reserve(MST.AllEdges.size()); 
-  for (auto &E : MST.AllEdges) 
-    EdgeList.push_back(E.get()); 
- 
-  for (auto &E : EdgeList) { 
-    BasicBlock *InstrBB = getInstrBB(E); 
-    if (InstrBB) 
-      InstrumentBBs.push_back(InstrBB); 
-  } 
- 
-  // Set up InEdges/OutEdges for all BBs. 
-  for (auto &E : MST.AllEdges) { 
-    if (E->Removed) 
-      continue; 
-    const BasicBlock *SrcBB = E->SrcBB; 
-    const BasicBlock *DestBB = E->DestBB; 
-    BBInfo &SrcInfo = getBBInfo(SrcBB); 
-    BBInfo &DestInfo = getBBInfo(DestBB); 
-    SrcInfo.addOutEdge(E.get()); 
-    DestInfo.addInEdge(E.get()); 
-  } 
-} 
- 
-// Given a CFG E to be instrumented, find which BB to place the instrumented 
-// code. The function will split the critical edge if necessary. 
-template <class Edge, class BBInfo> 
-BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) { 
-  if (E->InMST || E->Removed) 
-    return nullptr; 
- 
-  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB); 
-  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB); 
-  // For a fake edge, instrument the real BB. 
-  if (SrcBB == nullptr) 
-    return DestBB; 
-  if (DestBB == nullptr) 
-    return SrcBB; 
- 
-  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * { 
-    // There are basic blocks (such as catchswitch) cannot be instrumented. 
-    // If the returned first insertion point is the end of BB, skip this BB. 
-    if (BB->getFirstInsertionPt() == BB->end()) 
-      return nullptr; 
-    return BB; 
-  }; 
- 
-  // Instrument the SrcBB if it has a single successor, 
-  // otherwise, the DestBB if this is not a critical edge. 
-  Instruction *TI = SrcBB->getTerminator(); 
-  if (TI->getNumSuccessors() <= 1) 
-    return canInstrument(SrcBB); 
-  if (!E->IsCritical) 
-    return canInstrument(DestBB); 
- 
+  }
+}
+
+// Collect all the BBs that will be instruments and return them in
+// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
+    std::vector<BasicBlock *> &InstrumentBBs) {
+  // Use a worklist as we will update the vector during the iteration.
+  std::vector<Edge *> EdgeList;
+  EdgeList.reserve(MST.AllEdges.size());
+  for (auto &E : MST.AllEdges)
+    EdgeList.push_back(E.get());
+
+  for (auto &E : EdgeList) {
+    BasicBlock *InstrBB = getInstrBB(E);
+    if (InstrBB)
+      InstrumentBBs.push_back(InstrBB);
+  }
+
+  // Set up InEdges/OutEdges for all BBs.
+  for (auto &E : MST.AllEdges) {
+    if (E->Removed)
+      continue;
+    const BasicBlock *SrcBB = E->SrcBB;
+    const BasicBlock *DestBB = E->DestBB;
+    BBInfo &SrcInfo = getBBInfo(SrcBB);
+    BBInfo &DestInfo = getBBInfo(DestBB);
+    SrcInfo.addOutEdge(E.get());
+    DestInfo.addInEdge(E.get());
+  }
+}
+
+// Given a CFG E to be instrumented, find which BB to place the instrumented
+// code. The function will split the critical edge if necessary.
+template <class Edge, class BBInfo>
+BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
+  if (E->InMST || E->Removed)
+    return nullptr;
+
+  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
+  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
+  // For a fake edge, instrument the real BB.
+  if (SrcBB == nullptr)
+    return DestBB;
+  if (DestBB == nullptr)
+    return SrcBB;
+
+  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
+    // There are basic blocks (such as catchswitch) cannot be instrumented.
+    // If the returned first insertion point is the end of BB, skip this BB.
+    if (BB->getFirstInsertionPt() == BB->end())
+      return nullptr;
+    return BB;
+  };
+
+  // Instrument the SrcBB if it has a single successor,
+  // otherwise, the DestBB if this is not a critical edge.
+  Instruction *TI = SrcBB->getTerminator();
+  if (TI->getNumSuccessors() <= 1)
+    return canInstrument(SrcBB);
+  if (!E->IsCritical)
+    return canInstrument(DestBB);
+
   // Some IndirectBr critical edges cannot be split by the previous
   // SplitIndirectBrCriticalEdges call. Bail out.
-  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 
+  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
   BasicBlock *InstrBB =
       isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
-  if (!InstrBB) { 
-    LLVM_DEBUG( 
-        dbgs() << "Fail to split critical edge: not instrument this edge.\n"); 
-    return nullptr; 
-  } 
-  // For a critical edge, we have to split. Instrument the newly 
-  // created BB. 
-  IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; 
-  LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index 
-                    << " --> " << getBBInfo(DestBB).Index << "\n"); 
-  // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB. 
-  MST.addEdge(SrcBB, InstrBB, 0); 
-  // Second one: Add new edge of InstrBB->DestBB. 
-  Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0); 
-  NewEdge1.InMST = true; 
-  E->Removed = true; 
- 
-  return canInstrument(InstrBB); 
-} 
- 
-// When generating value profiling calls on Windows routines that make use of 
-// handler funclets for exception processing an operand bundle needs to attached 
-// to the called function. This routine will set \p OpBundles to contain the 
-// funclet information, if any is needed, that should be placed on the generated 
-// value profiling call for the value profile candidate call. 
-static void 
-populateEHOperandBundle(VPCandidateInfo &Cand, 
-                        DenseMap<BasicBlock *, ColorVector> &BlockColors, 
-                        SmallVectorImpl<OperandBundleDef> &OpBundles) { 
-  auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst); 
-  if (OrigCall && !isa<IntrinsicInst>(OrigCall)) { 
-    // The instrumentation call should belong to the same funclet as a 
-    // non-intrinsic call, so just copy the operand bundle, if any exists. 
-    Optional<OperandBundleUse> ParentFunclet = 
-        OrigCall->getOperandBundle(LLVMContext::OB_funclet); 
-    if (ParentFunclet) 
-      OpBundles.emplace_back(OperandBundleDef(*ParentFunclet)); 
-  } else { 
-    // Intrinsics or other instructions do not get funclet information from the 
-    // front-end. Need to use the BlockColors that was computed by the routine 
-    // colorEHFunclets to determine whether a funclet is needed. 
-    if (!BlockColors.empty()) { 
-      const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second; 
-      assert(CV.size() == 1 && "non-unique color for block!"); 
-      Instruction *EHPad = CV.front()->getFirstNonPHI(); 
-      if (EHPad->isEHPad()) 
-        OpBundles.emplace_back("funclet", EHPad); 
-    } 
-  } 
-} 
- 
-// Visit all edge and instrument the edges not in MST, and do value profiling. 
-// Critical edges will be split. 
-static void instrumentOneFunc( 
-    Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI, 
-    BlockFrequencyInfo *BFI, 
-    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 
-    bool IsCS) { 
-  // Split indirectbr critical edges here before computing the MST rather than 
-  // later in getInstrBB() to avoid invalidating it. 
-  SplitIndirectBrCriticalEdges(F, BPI, BFI); 
- 
+  if (!InstrBB) {
+    LLVM_DEBUG(
+        dbgs() << "Fail to split critical edge: not instrument this edge.\n");
+    return nullptr;
+  }
+  // For a critical edge, we have to split. Instrument the newly
+  // created BB.
+  IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
+  LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
+                    << " --> " << getBBInfo(DestBB).Index << "\n");
+  // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
+  MST.addEdge(SrcBB, InstrBB, 0);
+  // Second one: Add new edge of InstrBB->DestBB.
+  Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
+  NewEdge1.InMST = true;
+  E->Removed = true;
+
+  return canInstrument(InstrBB);
+}
+
+// When generating value profiling calls on Windows routines that make use of
+// handler funclets for exception processing an operand bundle needs to attached
+// to the called function. This routine will set \p OpBundles to contain the
+// funclet information, if any is needed, that should be placed on the generated
+// value profiling call for the value profile candidate call.
+static void
+populateEHOperandBundle(VPCandidateInfo &Cand,
+                        DenseMap<BasicBlock *, ColorVector> &BlockColors,
+                        SmallVectorImpl<OperandBundleDef> &OpBundles) {
+  auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
+  if (OrigCall && !isa<IntrinsicInst>(OrigCall)) {
+    // The instrumentation call should belong to the same funclet as a
+    // non-intrinsic call, so just copy the operand bundle, if any exists.
+    Optional<OperandBundleUse> ParentFunclet =
+        OrigCall->getOperandBundle(LLVMContext::OB_funclet);
+    if (ParentFunclet)
+      OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
+  } else {
+    // Intrinsics or other instructions do not get funclet information from the
+    // front-end. Need to use the BlockColors that was computed by the routine
+    // colorEHFunclets to determine whether a funclet is needed.
+    if (!BlockColors.empty()) {
+      const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
+      assert(CV.size() == 1 && "non-unique color for block!");
+      Instruction *EHPad = CV.front()->getFirstNonPHI();
+      if (EHPad->isEHPad())
+        OpBundles.emplace_back("funclet", EHPad);
+    }
+  }
+}
+
+// Visit all edge and instrument the edges not in MST, and do value profiling.
+// Critical edges will be split.
+static void instrumentOneFunc(
+    Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI,
+    BlockFrequencyInfo *BFI,
+    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
+    bool IsCS) {
+  // Split indirectbr critical edges here before computing the MST rather than
+  // later in getInstrBB() to avoid invalidating it.
+  SplitIndirectBrCriticalEdges(F, BPI, BFI);
+
   FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
       F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
-  std::vector<BasicBlock *> InstrumentBBs; 
-  FuncInfo.getInstrumentBBs(InstrumentBBs); 
-  unsigned NumCounters = 
-      InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 
- 
-  uint32_t I = 0; 
-  Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); 
-  for (auto *InstrBB : InstrumentBBs) { 
-    IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); 
-    assert(Builder.GetInsertPoint() != InstrBB->end() && 
-           "Cannot get the Instrumentation point"); 
-    Builder.CreateCall( 
-        Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), 
-        {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 
-         Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), 
-         Builder.getInt32(I++)}); 
-  } 
- 
-  // Now instrument select instructions: 
-  FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar, 
-                                       FuncInfo.FunctionHash); 
-  assert(I == NumCounters); 
- 
-  if (DisableValueProfiling) 
-    return; 
- 
-  NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size(); 
- 
-  // Intrinsic function calls do not have funclet operand bundles needed for 
-  // Windows exception handling attached to them. However, if value profiling is 
-  // inserted for one of these calls, then a funclet value will need to be set 
-  // on the instrumentation call based on the funclet coloring. 
-  DenseMap<BasicBlock *, ColorVector> BlockColors; 
-  if (F.hasPersonalityFn() && 
-      isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 
-    BlockColors = colorEHFunclets(F); 
- 
-  // For each VP Kind, walk the VP candidates and instrument each one. 
-  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 
-    unsigned SiteIndex = 0; 
-    if (Kind == IPVK_MemOPSize && !PGOInstrMemOP) 
-      continue; 
- 
-    for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) { 
-      LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind] 
-                        << " site: CallSite Index = " << SiteIndex << "\n"); 
- 
-      IRBuilder<> Builder(Cand.InsertPt); 
-      assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() && 
-             "Cannot get the Instrumentation point"); 
- 
-      Value *ToProfile = nullptr; 
-      if (Cand.V->getType()->isIntegerTy()) 
-        ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty()); 
-      else if (Cand.V->getType()->isPointerTy()) 
-        ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty()); 
-      assert(ToProfile && "value profiling Value is of unexpected type"); 
- 
-      SmallVector<OperandBundleDef, 1> OpBundles; 
-      populateEHOperandBundle(Cand, BlockColors, OpBundles); 
-      Builder.CreateCall( 
-          Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), 
-          {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), 
-           Builder.getInt64(FuncInfo.FunctionHash), ToProfile, 
-           Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)}, 
-          OpBundles); 
-    } 
-  } // IPVK_First <= Kind <= IPVK_Last 
-} 
- 
-namespace { 
- 
-// This class represents a CFG edge in profile use compilation. 
-struct PGOUseEdge : public PGOEdge { 
-  bool CountValid = false; 
-  uint64_t CountValue = 0; 
- 
-  PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) 
-      : PGOEdge(Src, Dest, W) {} 
- 
-  // Set edge count value 
-  void setEdgeCount(uint64_t Value) { 
-    CountValue = Value; 
-    CountValid = true; 
-  } 
- 
-  // Return the information string for this object. 
-  const std::string infoString() const { 
-    if (!CountValid) 
-      return PGOEdge::infoString(); 
-    return (Twine(PGOEdge::infoString()) + "  Count=" + Twine(CountValue)) 
-        .str(); 
-  } 
-}; 
- 
-using DirectEdges = SmallVector<PGOUseEdge *, 2>; 
- 
-// This class stores the auxiliary information for each BB. 
-struct UseBBInfo : public BBInfo { 
-  uint64_t CountValue = 0; 
-  bool CountValid; 
-  int32_t UnknownCountInEdge = 0; 
-  int32_t UnknownCountOutEdge = 0; 
-  DirectEdges InEdges; 
-  DirectEdges OutEdges; 
- 
-  UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} 
- 
-  UseBBInfo(unsigned IX, uint64_t C) 
-      : BBInfo(IX), CountValue(C), CountValid(true) {} 
- 
-  // Set the profile count value for this BB. 
-  void setBBInfoCount(uint64_t Value) { 
-    CountValue = Value; 
-    CountValid = true; 
-  } 
- 
-  // Return the information string of this object. 
-  const std::string infoString() const { 
-    if (!CountValid) 
-      return BBInfo::infoString(); 
-    return (Twine(BBInfo::infoString()) + "  Count=" + Twine(CountValue)).str(); 
-  } 
- 
-  // Add an OutEdge and update the edge count. 
-  void addOutEdge(PGOUseEdge *E) { 
-    OutEdges.push_back(E); 
-    UnknownCountOutEdge++; 
-  } 
- 
-  // Add an InEdge and update the edge count. 
-  void addInEdge(PGOUseEdge *E) { 
-    InEdges.push_back(E); 
-    UnknownCountInEdge++; 
-  } 
-}; 
- 
-} // end anonymous namespace 
- 
-// Sum up the count values for all the edges. 
-static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) { 
-  uint64_t Total = 0; 
-  for (auto &E : Edges) { 
-    if (E->Removed) 
-      continue; 
-    Total += E->CountValue; 
-  } 
-  return Total; 
-} 
- 
-namespace { 
- 
-class PGOUseFunc { 
-public: 
-  PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, 
-             std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers, 
-             BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, 
+  std::vector<BasicBlock *> InstrumentBBs;
+  FuncInfo.getInstrumentBBs(InstrumentBBs);
+  unsigned NumCounters =
+      InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
+
+  uint32_t I = 0;
+  Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
+  for (auto *InstrBB : InstrumentBBs) {
+    IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
+    assert(Builder.GetInsertPoint() != InstrBB->end() &&
+           "Cannot get the Instrumentation point");
+    Builder.CreateCall(
+        Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
+        {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
+         Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
+         Builder.getInt32(I++)});
+  }
+
+  // Now instrument select instructions:
+  FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
+                                       FuncInfo.FunctionHash);
+  assert(I == NumCounters);
+
+  if (DisableValueProfiling)
+    return;
+
+  NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
+
+  // Intrinsic function calls do not have funclet operand bundles needed for
+  // Windows exception handling attached to them. However, if value profiling is
+  // inserted for one of these calls, then a funclet value will need to be set
+  // on the instrumentation call based on the funclet coloring.
+  DenseMap<BasicBlock *, ColorVector> BlockColors;
+  if (F.hasPersonalityFn() &&
+      isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
+    BlockColors = colorEHFunclets(F);
+
+  // For each VP Kind, walk the VP candidates and instrument each one.
+  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
+    unsigned SiteIndex = 0;
+    if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
+      continue;
+
+    for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
+      LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
+                        << " site: CallSite Index = " << SiteIndex << "\n");
+
+      IRBuilder<> Builder(Cand.InsertPt);
+      assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
+             "Cannot get the Instrumentation point");
+
+      Value *ToProfile = nullptr;
+      if (Cand.V->getType()->isIntegerTy())
+        ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
+      else if (Cand.V->getType()->isPointerTy())
+        ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
+      assert(ToProfile && "value profiling Value is of unexpected type");
+
+      SmallVector<OperandBundleDef, 1> OpBundles;
+      populateEHOperandBundle(Cand, BlockColors, OpBundles);
+      Builder.CreateCall(
+          Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
+          {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
+           Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
+           Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
+          OpBundles);
+    }
+  } // IPVK_First <= Kind <= IPVK_Last
+}
+
+namespace {
+
+// This class represents a CFG edge in profile use compilation.
+struct PGOUseEdge : public PGOEdge {
+  bool CountValid = false;
+  uint64_t CountValue = 0;
+
+  PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
+      : PGOEdge(Src, Dest, W) {}
+
+  // Set edge count value
+  void setEdgeCount(uint64_t Value) {
+    CountValue = Value;
+    CountValid = true;
+  }
+
+  // Return the information string for this object.
+  const std::string infoString() const {
+    if (!CountValid)
+      return PGOEdge::infoString();
+    return (Twine(PGOEdge::infoString()) + "  Count=" + Twine(CountValue))
+        .str();
+  }
+};
+
+using DirectEdges = SmallVector<PGOUseEdge *, 2>;
+
+// This class stores the auxiliary information for each BB.
+struct UseBBInfo : public BBInfo {
+  uint64_t CountValue = 0;
+  bool CountValid;
+  int32_t UnknownCountInEdge = 0;
+  int32_t UnknownCountOutEdge = 0;
+  DirectEdges InEdges;
+  DirectEdges OutEdges;
+
+  UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
+
+  UseBBInfo(unsigned IX, uint64_t C)
+      : BBInfo(IX), CountValue(C), CountValid(true) {}
+
+  // Set the profile count value for this BB.
+  void setBBInfoCount(uint64_t Value) {
+    CountValue = Value;
+    CountValid = true;
+  }
+
+  // Return the information string of this object.
+  const std::string infoString() const {
+    if (!CountValid)
+      return BBInfo::infoString();
+    return (Twine(BBInfo::infoString()) + "  Count=" + Twine(CountValue)).str();
+  }
+
+  // Add an OutEdge and update the edge count.
+  void addOutEdge(PGOUseEdge *E) {
+    OutEdges.push_back(E);
+    UnknownCountOutEdge++;
+  }
+
+  // Add an InEdge and update the edge count.
+  void addInEdge(PGOUseEdge *E) {
+    InEdges.push_back(E);
+    UnknownCountInEdge++;
+  }
+};
+
+} // end anonymous namespace
+
+// Sum up the count values for all the edges.
+static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
+  uint64_t Total = 0;
+  for (auto &E : Edges) {
+    if (E->Removed)
+      continue;
+    Total += E->CountValue;
+  }
+  return Total;
+}
+
+namespace {
+
+class PGOUseFunc {
+public:
+  PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
+             std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
+             BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
              ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
-      : F(Func), M(Modu), BFI(BFIin), PSI(PSI), 
+      : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
         FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
                  InstrumentFuncEntry),
-        FreqAttr(FFA_Normal), IsCS(IsCS) {} 
- 
-  // Read counts for the instrumented BB from profile. 
+        FreqAttr(FFA_Normal), IsCS(IsCS) {}
+
+  // Read counts for the instrumented BB from profile.
   bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
                     bool &AllMinusOnes);
- 
-  // Populate the counts for all BBs. 
-  void populateCounters(); 
- 
-  // Set the branch weights based on the count values. 
-  void setBranchWeights(); 
- 
-  // Annotate the value profile call sites for all value kind. 
-  void annotateValueSites(); 
- 
-  // Annotate the value profile call sites for one value kind. 
-  void annotateValueSites(uint32_t Kind); 
- 
-  // Annotate the irreducible loop header weights. 
-  void annotateIrrLoopHeaderWeights(); 
- 
-  // The hotness of the function from the profile count. 
-  enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot }; 
- 
-  // Return the function hotness from the profile. 
-  FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; } 
- 
-  // Return the function hash. 
-  uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } 
- 
-  // Return the profile record for this function; 
-  InstrProfRecord &getProfileRecord() { return ProfileRecord; } 
- 
-  // Return the auxiliary BB information. 
-  UseBBInfo &getBBInfo(const BasicBlock *BB) const { 
-    return FuncInfo.getBBInfo(BB); 
-  } 
- 
-  // Return the auxiliary BB information if available. 
-  UseBBInfo *findBBInfo(const BasicBlock *BB) const { 
-    return FuncInfo.findBBInfo(BB); 
-  } 
- 
-  Function &getFunc() const { return F; } 
- 
-  void dumpInfo(std::string Str = "") const { 
-    FuncInfo.dumpInfo(Str); 
-  } 
- 
-  uint64_t getProgramMaxCount() const { return ProgramMaxCount; } 
-private: 
-  Function &F; 
-  Module *M; 
-  BlockFrequencyInfo *BFI; 
-  ProfileSummaryInfo *PSI; 
- 
-  // This member stores the shared information with class PGOGenFunc. 
-  FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo; 
- 
-  // The maximum count value in the profile. This is only used in PGO use 
-  // compilation. 
-  uint64_t ProgramMaxCount; 
- 
-  // Position of counter that remains to be read. 
-  uint32_t CountPosition = 0; 
- 
-  // Total size of the profile count for this function. 
-  uint32_t ProfileCountSize = 0; 
- 
-  // ProfileRecord for this function. 
-  InstrProfRecord ProfileRecord; 
- 
-  // Function hotness info derived from profile. 
-  FuncFreqAttr FreqAttr; 
- 
-  // Is to use the context sensitive profile. 
-  bool IsCS; 
- 
-  // Find the Instrumented BB and set the value. Return false on error. 
-  bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile); 
- 
-  // Set the edge counter value for the unknown edge -- there should be only 
-  // one unknown edge. 
-  void setEdgeCount(DirectEdges &Edges, uint64_t Value); 
- 
-  // Return FuncName string; 
-  const std::string getFuncName() const { return FuncInfo.FuncName; } 
- 
-  // Set the hot/cold inline hints based on the count values. 
-  // FIXME: This function should be removed once the functionality in 
-  // the inliner is implemented. 
-  void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { 
-    if (PSI->isHotCount(EntryCount)) 
-      FreqAttr = FFA_Hot; 
-    else if (PSI->isColdCount(MaxCount)) 
-      FreqAttr = FFA_Cold; 
-  } 
-}; 
- 
-} // end anonymous namespace 
- 
-// Visit all the edges and assign the count value for the instrumented 
-// edges and the BB. Return false on error. 
-bool PGOUseFunc::setInstrumentedCounts( 
-    const std::vector<uint64_t> &CountFromProfile) { 
- 
-  std::vector<BasicBlock *> InstrumentBBs; 
-  FuncInfo.getInstrumentBBs(InstrumentBBs); 
-  unsigned NumCounters = 
-      InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts(); 
-  // The number of counters here should match the number of counters 
-  // in profile. Return if they mismatch. 
-  if (NumCounters != CountFromProfile.size()) { 
-    return false; 
-  } 
+
+  // Populate the counts for all BBs.
+  void populateCounters();
+
+  // Set the branch weights based on the count values.
+  void setBranchWeights();
+
+  // Annotate the value profile call sites for all value kind.
+  void annotateValueSites();
+
+  // Annotate the value profile call sites for one value kind.
+  void annotateValueSites(uint32_t Kind);
+
+  // Annotate the irreducible loop header weights.
+  void annotateIrrLoopHeaderWeights();
+
+  // The hotness of the function from the profile count.
+  enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
+
+  // Return the function hotness from the profile.
+  FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
+
+  // Return the function hash.
+  uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
+
+  // Return the profile record for this function;
+  InstrProfRecord &getProfileRecord() { return ProfileRecord; }
+
+  // Return the auxiliary BB information.
+  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
+    return FuncInfo.getBBInfo(BB);
+  }
+
+  // Return the auxiliary BB information if available.
+  UseBBInfo *findBBInfo(const BasicBlock *BB) const {
+    return FuncInfo.findBBInfo(BB);
+  }
+
+  Function &getFunc() const { return F; }
+
+  void dumpInfo(std::string Str = "") const {
+    FuncInfo.dumpInfo(Str);
+  }
+
+  uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
+private:
+  Function &F;
+  Module *M;
+  BlockFrequencyInfo *BFI;
+  ProfileSummaryInfo *PSI;
+
+  // This member stores the shared information with class PGOGenFunc.
+  FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
+
+  // The maximum count value in the profile. This is only used in PGO use
+  // compilation.
+  uint64_t ProgramMaxCount;
+
+  // Position of counter that remains to be read.
+  uint32_t CountPosition = 0;
+
+  // Total size of the profile count for this function.
+  uint32_t ProfileCountSize = 0;
+
+  // ProfileRecord for this function.
+  InstrProfRecord ProfileRecord;
+
+  // Function hotness info derived from profile.
+  FuncFreqAttr FreqAttr;
+
+  // Is to use the context sensitive profile.
+  bool IsCS;
+
+  // Find the Instrumented BB and set the value. Return false on error.
+  bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
+
+  // Set the edge counter value for the unknown edge -- there should be only
+  // one unknown edge.
+  void setEdgeCount(DirectEdges &Edges, uint64_t Value);
+
+  // Return FuncName string;
+  const std::string getFuncName() const { return FuncInfo.FuncName; }
+
+  // Set the hot/cold inline hints based on the count values.
+  // FIXME: This function should be removed once the functionality in
+  // the inliner is implemented.
+  void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
+    if (PSI->isHotCount(EntryCount))
+      FreqAttr = FFA_Hot;
+    else if (PSI->isColdCount(MaxCount))
+      FreqAttr = FFA_Cold;
+  }
+};
+
+} // end anonymous namespace
+
+// Visit all the edges and assign the count value for the instrumented
+// edges and the BB. Return false on error.
+bool PGOUseFunc::setInstrumentedCounts(
+    const std::vector<uint64_t> &CountFromProfile) {
+
+  std::vector<BasicBlock *> InstrumentBBs;
+  FuncInfo.getInstrumentBBs(InstrumentBBs);
+  unsigned NumCounters =
+      InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
+  // The number of counters here should match the number of counters
+  // in profile. Return if they mismatch.
+  if (NumCounters != CountFromProfile.size()) {
+    return false;
+  }
   auto *FuncEntry = &*F.begin();
 
-  // Set the profile count to the Instrumented BBs. 
-  uint32_t I = 0; 
-  for (BasicBlock *InstrBB : InstrumentBBs) { 
-    uint64_t CountValue = CountFromProfile[I++]; 
-    UseBBInfo &Info = getBBInfo(InstrBB); 
+  // Set the profile count to the Instrumented BBs.
+  uint32_t I = 0;
+  for (BasicBlock *InstrBB : InstrumentBBs) {
+    uint64_t CountValue = CountFromProfile[I++];
+    UseBBInfo &Info = getBBInfo(InstrBB);
     // If we reach here, we know that we have some nonzero count
     // values in this function. The entry count should not be 0.
     // Fix it if necessary.
     if (InstrBB == FuncEntry && CountValue == 0)
       CountValue = 1;
-    Info.setBBInfoCount(CountValue); 
-  } 
-  ProfileCountSize = CountFromProfile.size(); 
-  CountPosition = I; 
- 
-  // Set the edge count and update the count of unknown edges for BBs. 
-  auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void { 
-    E->setEdgeCount(Value); 
-    this->getBBInfo(E->SrcBB).UnknownCountOutEdge--; 
-    this->getBBInfo(E->DestBB).UnknownCountInEdge--; 
-  }; 
- 
-  // Set the profile count the Instrumented edges. There are BBs that not in 
-  // MST but not instrumented. Need to set the edge count value so that we can 
-  // populate the profile counts later. 
-  for (auto &E : FuncInfo.MST.AllEdges) { 
-    if (E->Removed || E->InMST) 
-      continue; 
-    const BasicBlock *SrcBB = E->SrcBB; 
-    UseBBInfo &SrcInfo = getBBInfo(SrcBB); 
- 
-    // If only one out-edge, the edge profile count should be the same as BB 
-    // profile count. 
-    if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1) 
-      setEdgeCount(E.get(), SrcInfo.CountValue); 
-    else { 
-      const BasicBlock *DestBB = E->DestBB; 
-      UseBBInfo &DestInfo = getBBInfo(DestBB); 
-      // If only one in-edge, the edge profile count should be the same as BB 
-      // profile count. 
-      if (DestInfo.CountValid && DestInfo.InEdges.size() == 1) 
-        setEdgeCount(E.get(), DestInfo.CountValue); 
-    } 
-    if (E->CountValid) 
-      continue; 
-    // E's count should have been set from profile. If not, this meenas E skips 
-    // the instrumentation. We set the count to 0. 
-    setEdgeCount(E.get(), 0); 
-  } 
-  return true; 
-} 
- 
-// Set the count value for the unknown edge. There should be one and only one 
-// unknown edge in Edges vector. 
-void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { 
-  for (auto &E : Edges) { 
-    if (E->CountValid) 
-      continue; 
-    E->setEdgeCount(Value); 
- 
-    getBBInfo(E->SrcBB).UnknownCountOutEdge--; 
-    getBBInfo(E->DestBB).UnknownCountInEdge--; 
-    return; 
-  } 
-  llvm_unreachable("Cannot find the unknown count edge"); 
-} 
- 
-// Read the profile from ProfileFileName and assign the value to the 
-// instrumented BB and the edges. This function also updates ProgramMaxCount. 
-// Return true if the profile are successfully read, and false on errors. 
+    Info.setBBInfoCount(CountValue);
+  }
+  ProfileCountSize = CountFromProfile.size();
+  CountPosition = I;
+
+  // Set the edge count and update the count of unknown edges for BBs.
+  auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
+    E->setEdgeCount(Value);
+    this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
+    this->getBBInfo(E->DestBB).UnknownCountInEdge--;
+  };
+
+  // Set the profile count the Instrumented edges. There are BBs that not in
+  // MST but not instrumented. Need to set the edge count value so that we can
+  // populate the profile counts later.
+  for (auto &E : FuncInfo.MST.AllEdges) {
+    if (E->Removed || E->InMST)
+      continue;
+    const BasicBlock *SrcBB = E->SrcBB;
+    UseBBInfo &SrcInfo = getBBInfo(SrcBB);
+
+    // If only one out-edge, the edge profile count should be the same as BB
+    // profile count.
+    if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
+      setEdgeCount(E.get(), SrcInfo.CountValue);
+    else {
+      const BasicBlock *DestBB = E->DestBB;
+      UseBBInfo &DestInfo = getBBInfo(DestBB);
+      // If only one in-edge, the edge profile count should be the same as BB
+      // profile count.
+      if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
+        setEdgeCount(E.get(), DestInfo.CountValue);
+    }
+    if (E->CountValid)
+      continue;
+    // E's count should have been set from profile. If not, this meenas E skips
+    // the instrumentation. We set the count to 0.
+    setEdgeCount(E.get(), 0);
+  }
+  return true;
+}
+
+// Set the count value for the unknown edge. There should be one and only one
+// unknown edge in Edges vector.
+void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
+  for (auto &E : Edges) {
+    if (E->CountValid)
+      continue;
+    E->setEdgeCount(Value);
+
+    getBBInfo(E->SrcBB).UnknownCountOutEdge--;
+    getBBInfo(E->DestBB).UnknownCountInEdge--;
+    return;
+  }
+  llvm_unreachable("Cannot find the unknown count edge");
+}
+
+// Read the profile from ProfileFileName and assign the value to the
+// instrumented BB and the edges. This function also updates ProgramMaxCount.
+// Return true if the profile are successfully read, and false on errors.
 bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
                               bool &AllMinusOnes) {
-  auto &Ctx = M->getContext(); 
-  Expected<InstrProfRecord> Result = 
-      PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); 
-  if (Error E = Result.takeError()) { 
-    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 
-      auto Err = IPE.get(); 
-      bool SkipWarning = false; 
-      LLVM_DEBUG(dbgs() << "Error in reading profile for Func " 
-                        << FuncInfo.FuncName << ": "); 
-      if (Err == instrprof_error::unknown_function) { 
-        IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; 
-        SkipWarning = !PGOWarnMissing; 
-        LLVM_DEBUG(dbgs() << "unknown function"); 
-      } else if (Err == instrprof_error::hash_mismatch || 
-                 Err == instrprof_error::malformed) { 
-        IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; 
-        SkipWarning = 
-            NoPGOWarnMismatch || 
-            (NoPGOWarnMismatchComdat && 
-             (F.hasComdat() || 
-              F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 
-        LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 
-      } 
- 
-      LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); 
-      if (SkipWarning) 
-        return; 
- 
-      std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + 
-                        std::string(" Hash = ") + 
-                        std::to_string(FuncInfo.FunctionHash); 
- 
-      Ctx.diagnose( 
-          DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); 
-    }); 
-    return false; 
-  } 
-  ProfileRecord = std::move(Result.get()); 
-  std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts; 
- 
-  IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; 
-  LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); 
+  auto &Ctx = M->getContext();
+  Expected<InstrProfRecord> Result =
+      PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
+  if (Error E = Result.takeError()) {
+    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+      auto Err = IPE.get();
+      bool SkipWarning = false;
+      LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
+                        << FuncInfo.FuncName << ": ");
+      if (Err == instrprof_error::unknown_function) {
+        IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
+        SkipWarning = !PGOWarnMissing;
+        LLVM_DEBUG(dbgs() << "unknown function");
+      } else if (Err == instrprof_error::hash_mismatch ||
+                 Err == instrprof_error::malformed) {
+        IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
+        SkipWarning =
+            NoPGOWarnMismatch ||
+            (NoPGOWarnMismatchComdat &&
+             (F.hasComdat() ||
+              F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+        LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
+      }
+
+      LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
+      if (SkipWarning)
+        return;
+
+      std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
+                        std::string(" Hash = ") +
+                        std::to_string(FuncInfo.FunctionHash);
+
+      Ctx.diagnose(
+          DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
+    });
+    return false;
+  }
+  ProfileRecord = std::move(Result.get());
+  std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
+
+  IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
+  LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
   AllMinusOnes = (CountFromProfile.size() > 0);
-  uint64_t ValueSum = 0; 
-  for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { 
-    LLVM_DEBUG(dbgs() << "  " << I << ": " << CountFromProfile[I] << "\n"); 
-    ValueSum += CountFromProfile[I]; 
+  uint64_t ValueSum = 0;
+  for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
+    LLVM_DEBUG(dbgs() << "  " << I << ": " << CountFromProfile[I] << "\n");
+    ValueSum += CountFromProfile[I];
     if (CountFromProfile[I] != (uint64_t)-1)
       AllMinusOnes = false;
-  } 
-  AllZeros = (ValueSum == 0); 
- 
-  LLVM_DEBUG(dbgs() << "SUM =  " << ValueSum << "\n"); 
- 
-  getBBInfo(nullptr).UnknownCountOutEdge = 2; 
-  getBBInfo(nullptr).UnknownCountInEdge = 2; 
- 
-  if (!setInstrumentedCounts(CountFromProfile)) { 
-    LLVM_DEBUG( 
-        dbgs() << "Inconsistent number of counts, skipping this function"); 
-    Ctx.diagnose(DiagnosticInfoPGOProfile( 
-        M->getName().data(), 
-        Twine("Inconsistent number of counts in ") + F.getName().str() 
-        + Twine(": the profile may be stale or there is a function name collision."), 
-        DS_Warning)); 
-    return false; 
-  } 
-  ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); 
-  return true; 
-} 
- 
-// Populate the counters from instrumented BBs to all BBs. 
-// In the end of this operation, all BBs should have a valid count value. 
-void PGOUseFunc::populateCounters() { 
-  bool Changes = true; 
-  unsigned NumPasses = 0; 
-  while (Changes) { 
-    NumPasses++; 
-    Changes = false; 
- 
-    // For efficient traversal, it's better to start from the end as most 
-    // of the instrumented edges are at the end. 
-    for (auto &BB : reverse(F)) { 
-      UseBBInfo *Count = findBBInfo(&BB); 
-      if (Count == nullptr) 
-        continue; 
-      if (!Count->CountValid) { 
-        if (Count->UnknownCountOutEdge == 0) { 
-          Count->CountValue = sumEdgeCount(Count->OutEdges); 
-          Count->CountValid = true; 
-          Changes = true; 
-        } else if (Count->UnknownCountInEdge == 0) { 
-          Count->CountValue = sumEdgeCount(Count->InEdges); 
-          Count->CountValid = true; 
-          Changes = true; 
-        } 
-      } 
-      if (Count->CountValid) { 
-        if (Count->UnknownCountOutEdge == 1) { 
-          uint64_t Total = 0; 
-          uint64_t OutSum = sumEdgeCount(Count->OutEdges); 
-          // If the one of the successor block can early terminate (no-return), 
-          // we can end up with situation where out edge sum count is larger as 
-          // the source BB's count is collected by a post-dominated block. 
-          if (Count->CountValue > OutSum) 
-            Total = Count->CountValue - OutSum; 
-          setEdgeCount(Count->OutEdges, Total); 
-          Changes = true; 
-        } 
-        if (Count->UnknownCountInEdge == 1) { 
-          uint64_t Total = 0; 
-          uint64_t InSum = sumEdgeCount(Count->InEdges); 
-          if (Count->CountValue > InSum) 
-            Total = Count->CountValue - InSum; 
-          setEdgeCount(Count->InEdges, Total); 
-          Changes = true; 
-        } 
-      } 
-    } 
-  } 
- 
-  LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); 
-#ifndef NDEBUG 
-  // Assert every BB has a valid counter. 
-  for (auto &BB : F) { 
-    auto BI = findBBInfo(&BB); 
-    if (BI == nullptr) 
-      continue; 
-    assert(BI->CountValid && "BB count is not valid"); 
-  } 
-#endif 
-  uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; 
-  uint64_t FuncMaxCount = FuncEntryCount; 
-  for (auto &BB : F) { 
-    auto BI = findBBInfo(&BB); 
-    if (BI == nullptr) 
-      continue; 
-    FuncMaxCount = std::max(FuncMaxCount, BI->CountValue); 
-  } 
+  }
+  AllZeros = (ValueSum == 0);
+
+  LLVM_DEBUG(dbgs() << "SUM =  " << ValueSum << "\n");
+
+  getBBInfo(nullptr).UnknownCountOutEdge = 2;
+  getBBInfo(nullptr).UnknownCountInEdge = 2;
+
+  if (!setInstrumentedCounts(CountFromProfile)) {
+    LLVM_DEBUG(
+        dbgs() << "Inconsistent number of counts, skipping this function");
+    Ctx.diagnose(DiagnosticInfoPGOProfile(
+        M->getName().data(),
+        Twine("Inconsistent number of counts in ") + F.getName().str()
+        + Twine(": the profile may be stale or there is a function name collision."),
+        DS_Warning));
+    return false;
+  }
+  ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
+  return true;
+}
+
+// Populate the counters from instrumented BBs to all BBs.
+// In the end of this operation, all BBs should have a valid count value.
+void PGOUseFunc::populateCounters() {
+  bool Changes = true;
+  unsigned NumPasses = 0;
+  while (Changes) {
+    NumPasses++;
+    Changes = false;
+
+    // For efficient traversal, it's better to start from the end as most
+    // of the instrumented edges are at the end.
+    for (auto &BB : reverse(F)) {
+      UseBBInfo *Count = findBBInfo(&BB);
+      if (Count == nullptr)
+        continue;
+      if (!Count->CountValid) {
+        if (Count->UnknownCountOutEdge == 0) {
+          Count->CountValue = sumEdgeCount(Count->OutEdges);
+          Count->CountValid = true;
+          Changes = true;
+        } else if (Count->UnknownCountInEdge == 0) {
+          Count->CountValue = sumEdgeCount(Count->InEdges);
+          Count->CountValid = true;
+          Changes = true;
+        }
+      }
+      if (Count->CountValid) {
+        if (Count->UnknownCountOutEdge == 1) {
+          uint64_t Total = 0;
+          uint64_t OutSum = sumEdgeCount(Count->OutEdges);
+          // If the one of the successor block can early terminate (no-return),
+          // we can end up with situation where out edge sum count is larger as
+          // the source BB's count is collected by a post-dominated block.
+          if (Count->CountValue > OutSum)
+            Total = Count->CountValue - OutSum;
+          setEdgeCount(Count->OutEdges, Total);
+          Changes = true;
+        }
+        if (Count->UnknownCountInEdge == 1) {
+          uint64_t Total = 0;
+          uint64_t InSum = sumEdgeCount(Count->InEdges);
+          if (Count->CountValue > InSum)
+            Total = Count->CountValue - InSum;
+          setEdgeCount(Count->InEdges, Total);
+          Changes = true;
+        }
+      }
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
+#ifndef NDEBUG
+  // Assert every BB has a valid counter.
+  for (auto &BB : F) {
+    auto BI = findBBInfo(&BB);
+    if (BI == nullptr)
+      continue;
+    assert(BI->CountValid && "BB count is not valid");
+  }
+#endif
+  uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
+  uint64_t FuncMaxCount = FuncEntryCount;
+  for (auto &BB : F) {
+    auto BI = findBBInfo(&BB);
+    if (BI == nullptr)
+      continue;
+    FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
+  }
 
   // Fix the obviously inconsistent entry count.
   if (FuncMaxCount > 0 && FuncEntryCount == 0)
     FuncEntryCount = 1;
   F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
-  markFunctionAttributes(FuncEntryCount, FuncMaxCount); 
- 
-  // Now annotate select instructions 
-  FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition); 
-  assert(CountPosition == ProfileCountSize); 
- 
-  LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); 
-} 
- 
-// Assign the scaled count values to the BB with multiple out edges. 
-void PGOUseFunc::setBranchWeights() { 
-  // Generate MD_prof metadata for every branch instruction. 
-  LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() 
-                    << " IsCS=" << IsCS << "\n"); 
-  for (auto &BB : F) { 
-    Instruction *TI = BB.getTerminator(); 
-    if (TI->getNumSuccessors() < 2) 
-      continue; 
-    if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || 
-          isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI))) 
-      continue; 
- 
-    if (getBBInfo(&BB).CountValue == 0) 
-      continue; 
- 
-    // We have a non-zero Branch BB. 
-    const UseBBInfo &BBCountInfo = getBBInfo(&BB); 
-    unsigned Size = BBCountInfo.OutEdges.size(); 
-    SmallVector<uint64_t, 2> EdgeCounts(Size, 0); 
-    uint64_t MaxCount = 0; 
-    for (unsigned s = 0; s < Size; s++) { 
-      const PGOUseEdge *E = BBCountInfo.OutEdges[s]; 
-      const BasicBlock *SrcBB = E->SrcBB; 
-      const BasicBlock *DestBB = E->DestBB; 
-      if (DestBB == nullptr) 
-        continue; 
-      unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); 
-      uint64_t EdgeCount = E->CountValue; 
-      if (EdgeCount > MaxCount) 
-        MaxCount = EdgeCount; 
-      EdgeCounts[SuccNum] = EdgeCount; 
-    } 
-    setProfMetadata(M, TI, EdgeCounts, MaxCount); 
-  } 
-} 
- 
-static bool isIndirectBrTarget(BasicBlock *BB) { 
-  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 
-    if (isa<IndirectBrInst>((*PI)->getTerminator())) 
-      return true; 
-  } 
-  return false; 
-} 
- 
-void PGOUseFunc::annotateIrrLoopHeaderWeights() { 
-  LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n"); 
-  // Find irr loop headers 
-  for (auto &BB : F) { 
-    // As a heuristic also annotate indrectbr targets as they have a high chance 
-    // to become an irreducible loop header after the indirectbr tail 
-    // duplication. 
-    if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) { 
-      Instruction *TI = BB.getTerminator(); 
-      const UseBBInfo &BBCountInfo = getBBInfo(&BB); 
-      setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue); 
-    } 
-  } 
-} 
- 
-void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { 
-  Module *M = F.getParent(); 
-  IRBuilder<> Builder(&SI); 
-  Type *Int64Ty = Builder.getInt64Ty(); 
-  Type *I8PtrTy = Builder.getInt8PtrTy(); 
-  auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); 
-  Builder.CreateCall( 
-      Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), 
-      {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 
-       Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), 
-       Builder.getInt32(*CurCtrIdx), Step}); 
-  ++(*CurCtrIdx); 
-} 
- 
-void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) { 
-  std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts; 
-  assert(*CurCtrIdx < CountFromProfile.size() && 
-         "Out of bound access of counters"); 
-  uint64_t SCounts[2]; 
-  SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count 
-  ++(*CurCtrIdx); 
-  uint64_t TotalCount = 0; 
-  auto BI = UseFunc->findBBInfo(SI.getParent()); 
-  if (BI != nullptr) 
-    TotalCount = BI->CountValue; 
-  // False Count 
-  SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0); 
-  uint64_t MaxCount = std::max(SCounts[0], SCounts[1]); 
-  if (MaxCount) 
-    setProfMetadata(F.getParent(), &SI, SCounts, MaxCount); 
-} 
- 
-void SelectInstVisitor::visitSelectInst(SelectInst &SI) { 
-  if (!PGOInstrSelect) 
-    return; 
-  // FIXME: do not handle this yet. 
-  if (SI.getCondition()->getType()->isVectorTy()) 
-    return; 
- 
-  switch (Mode) { 
-  case VM_counting: 
-    NSIs++; 
-    return; 
-  case VM_instrument: 
-    instrumentOneSelectInst(SI); 
-    return; 
-  case VM_annotate: 
-    annotateOneSelectInst(SI); 
-    return; 
-  } 
- 
-  llvm_unreachable("Unknown visiting mode"); 
-} 
- 
-// Traverse all valuesites and annotate the instructions for all value kind. 
-void PGOUseFunc::annotateValueSites() { 
-  if (DisableValueProfiling) 
-    return; 
- 
-  // Create the PGOFuncName meta data. 
-  createPGOFuncNameMetadata(F, FuncInfo.FuncName); 
- 
-  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 
-    annotateValueSites(Kind); 
-} 
- 
-// Annotate the instructions for a specific value kind. 
-void PGOUseFunc::annotateValueSites(uint32_t Kind) { 
-  assert(Kind <= IPVK_Last); 
-  unsigned ValueSiteIndex = 0; 
-  auto &ValueSites = FuncInfo.ValueSites[Kind]; 
-  unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind); 
-  if (NumValueSites != ValueSites.size()) { 
-    auto &Ctx = M->getContext(); 
-    Ctx.diagnose(DiagnosticInfoPGOProfile( 
-        M->getName().data(), 
-        Twine("Inconsistent number of value sites for ") + 
-            Twine(ValueProfKindDescr[Kind]) + 
-            Twine(" profiling in \"") + F.getName().str() + 
-            Twine("\", possibly due to the use of a stale profile."), 
-        DS_Warning)); 
-    return; 
-  } 
- 
-  for (VPCandidateInfo &I : ValueSites) { 
-    LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind 
-                      << "): Index = " << ValueSiteIndex << " out of " 
-                      << NumValueSites << "\n"); 
-    annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord, 
-                      static_cast<InstrProfValueKind>(Kind), ValueSiteIndex, 
-                      Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations 
-                                             : MaxNumAnnotations); 
-    ValueSiteIndex++; 
-  } 
-} 
- 
-// Collect the set of members for each Comdat in module M and store 
-// in ComdatMembers. 
-static void collectComdatMembers( 
-    Module &M, 
-    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) { 
-  if (!DoComdatRenaming) 
-    return; 
-  for (Function &F : M) 
-    if (Comdat *C = F.getComdat()) 
-      ComdatMembers.insert(std::make_pair(C, &F)); 
-  for (GlobalVariable &GV : M.globals()) 
-    if (Comdat *C = GV.getComdat()) 
-      ComdatMembers.insert(std::make_pair(C, &GV)); 
-  for (GlobalAlias &GA : M.aliases()) 
-    if (Comdat *C = GA.getComdat()) 
-      ComdatMembers.insert(std::make_pair(C, &GA)); 
-} 
- 
-static bool InstrumentAllFunctions( 
-    Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 
-    function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 
-    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) { 
-  // For the context-sensitve instrumentation, we should have a separated pass 
-  // (before LTO/ThinLTO linking) to create these variables. 
-  if (!IsCS) 
+  markFunctionAttributes(FuncEntryCount, FuncMaxCount);
+
+  // Now annotate select instructions
+  FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
+  assert(CountPosition == ProfileCountSize);
+
+  LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
+}
+
+// Assign the scaled count values to the BB with multiple out edges.
+void PGOUseFunc::setBranchWeights() {
+  // Generate MD_prof metadata for every branch instruction.
+  LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
+                    << " IsCS=" << IsCS << "\n");
+  for (auto &BB : F) {
+    Instruction *TI = BB.getTerminator();
+    if (TI->getNumSuccessors() < 2)
+      continue;
+    if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
+          isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI)))
+      continue;
+
+    if (getBBInfo(&BB).CountValue == 0)
+      continue;
+
+    // We have a non-zero Branch BB.
+    const UseBBInfo &BBCountInfo = getBBInfo(&BB);
+    unsigned Size = BBCountInfo.OutEdges.size();
+    SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
+    uint64_t MaxCount = 0;
+    for (unsigned s = 0; s < Size; s++) {
+      const PGOUseEdge *E = BBCountInfo.OutEdges[s];
+      const BasicBlock *SrcBB = E->SrcBB;
+      const BasicBlock *DestBB = E->DestBB;
+      if (DestBB == nullptr)
+        continue;
+      unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+      uint64_t EdgeCount = E->CountValue;
+      if (EdgeCount > MaxCount)
+        MaxCount = EdgeCount;
+      EdgeCounts[SuccNum] = EdgeCount;
+    }
+    setProfMetadata(M, TI, EdgeCounts, MaxCount);
+  }
+}
+
+static bool isIndirectBrTarget(BasicBlock *BB) {
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    if (isa<IndirectBrInst>((*PI)->getTerminator()))
+      return true;
+  }
+  return false;
+}
+
+void PGOUseFunc::annotateIrrLoopHeaderWeights() {
+  LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
+  // Find irr loop headers
+  for (auto &BB : F) {
+    // As a heuristic also annotate indrectbr targets as they have a high chance
+    // to become an irreducible loop header after the indirectbr tail
+    // duplication.
+    if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
+      Instruction *TI = BB.getTerminator();
+      const UseBBInfo &BBCountInfo = getBBInfo(&BB);
+      setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
+    }
+  }
+}
+
+void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
+  Module *M = F.getParent();
+  IRBuilder<> Builder(&SI);
+  Type *Int64Ty = Builder.getInt64Ty();
+  Type *I8PtrTy = Builder.getInt8PtrTy();
+  auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
+  Builder.CreateCall(
+      Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
+      {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
+       Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
+       Builder.getInt32(*CurCtrIdx), Step});
+  ++(*CurCtrIdx);
+}
+
+void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
+  std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
+  assert(*CurCtrIdx < CountFromProfile.size() &&
+         "Out of bound access of counters");
+  uint64_t SCounts[2];
+  SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
+  ++(*CurCtrIdx);
+  uint64_t TotalCount = 0;
+  auto BI = UseFunc->findBBInfo(SI.getParent());
+  if (BI != nullptr)
+    TotalCount = BI->CountValue;
+  // False Count
+  SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
+  uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
+  if (MaxCount)
+    setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
+}
+
+void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
+  if (!PGOInstrSelect)
+    return;
+  // FIXME: do not handle this yet.
+  if (SI.getCondition()->getType()->isVectorTy())
+    return;
+
+  switch (Mode) {
+  case VM_counting:
+    NSIs++;
+    return;
+  case VM_instrument:
+    instrumentOneSelectInst(SI);
+    return;
+  case VM_annotate:
+    annotateOneSelectInst(SI);
+    return;
+  }
+
+  llvm_unreachable("Unknown visiting mode");
+}
+
+// Traverse all valuesites and annotate the instructions for all value kind.
+void PGOUseFunc::annotateValueSites() {
+  if (DisableValueProfiling)
+    return;
+
+  // Create the PGOFuncName meta data.
+  createPGOFuncNameMetadata(F, FuncInfo.FuncName);
+
+  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+    annotateValueSites(Kind);
+}
+
+// Annotate the instructions for a specific value kind.
+void PGOUseFunc::annotateValueSites(uint32_t Kind) {
+  assert(Kind <= IPVK_Last);
+  unsigned ValueSiteIndex = 0;
+  auto &ValueSites = FuncInfo.ValueSites[Kind];
+  unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
+  if (NumValueSites != ValueSites.size()) {
+    auto &Ctx = M->getContext();
+    Ctx.diagnose(DiagnosticInfoPGOProfile(
+        M->getName().data(),
+        Twine("Inconsistent number of value sites for ") +
+            Twine(ValueProfKindDescr[Kind]) +
+            Twine(" profiling in \"") + F.getName().str() +
+            Twine("\", possibly due to the use of a stale profile."),
+        DS_Warning));
+    return;
+  }
+
+  for (VPCandidateInfo &I : ValueSites) {
+    LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
+                      << "): Index = " << ValueSiteIndex << " out of "
+                      << NumValueSites << "\n");
+    annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
+                      static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
+                      Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
+                                             : MaxNumAnnotations);
+    ValueSiteIndex++;
+  }
+}
+
+// Collect the set of members for each Comdat in module M and store
+// in ComdatMembers.
+static void collectComdatMembers(
+    Module &M,
+    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
+  if (!DoComdatRenaming)
+    return;
+  for (Function &F : M)
+    if (Comdat *C = F.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &F));
+  for (GlobalVariable &GV : M.globals())
+    if (Comdat *C = GV.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &GV));
+  for (GlobalAlias &GA : M.aliases())
+    if (Comdat *C = GA.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &GA));
+}
+
+static bool InstrumentAllFunctions(
+    Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
+    function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
+    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
+  // For the context-sensitve instrumentation, we should have a separated pass
+  // (before LTO/ThinLTO linking) to create these variables.
+  if (!IsCS)
     createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry);
-  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 
-  collectComdatMembers(M, ComdatMembers); 
- 
-  for (auto &F : M) { 
-    if (F.isDeclaration()) 
-      continue; 
+  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
+  collectComdatMembers(M, ComdatMembers);
+
+  for (auto &F : M) {
+    if (F.isDeclaration())
+      continue;
     if (F.hasFnAttribute(llvm::Attribute::NoProfile))
       continue;
-    auto &TLI = LookupTLI(F); 
-    auto *BPI = LookupBPI(F); 
-    auto *BFI = LookupBFI(F); 
-    instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS); 
-  } 
-  return true; 
-} 
- 
-PreservedAnalyses 
-PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) { 
-  createProfileFileNameVar(M, CSInstrName); 
+    auto &TLI = LookupTLI(F);
+    auto *BPI = LookupBPI(F);
+    auto *BFI = LookupBFI(F);
+    instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
+  }
+  return true;
+}
+
+PreservedAnalyses
+PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
+  createProfileFileNameVar(M, CSInstrName);
   createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
-  return PreservedAnalyses::all(); 
-} 
- 
-bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { 
-  if (skipModule(M)) 
-    return false; 
- 
-  auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 
-    return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 
-  }; 
-  auto LookupBPI = [this](Function &F) { 
-    return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 
-  }; 
-  auto LookupBFI = [this](Function &F) { 
-    return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 
-  }; 
-  return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS); 
-} 
- 
-PreservedAnalyses PGOInstrumentationGen::run(Module &M, 
-                                             ModuleAnalysisManager &AM) { 
-  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 
-  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 
-    return FAM.getResult<TargetLibraryAnalysis>(F); 
-  }; 
-  auto LookupBPI = [&FAM](Function &F) { 
-    return &FAM.getResult<BranchProbabilityAnalysis>(F); 
-  }; 
-  auto LookupBFI = [&FAM](Function &F) { 
-    return &FAM.getResult<BlockFrequencyAnalysis>(F); 
-  }; 
- 
-  if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS)) 
-    return PreservedAnalyses::all(); 
- 
-  return PreservedAnalyses::none(); 
-} 
- 
+  return PreservedAnalyses::all();
+}
+
+bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
+  if (skipModule(M))
+    return false;
+
+  auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
+    return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  };
+  auto LookupBPI = [this](Function &F) {
+    return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
+  };
+  auto LookupBFI = [this](Function &F) {
+    return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
+  };
+  return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS);
+}
+
+PreservedAnalyses PGOInstrumentationGen::run(Module &M,
+                                             ModuleAnalysisManager &AM) {
+  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+  auto LookupBPI = [&FAM](Function &F) {
+    return &FAM.getResult<BranchProbabilityAnalysis>(F);
+  };
+  auto LookupBFI = [&FAM](Function &F) {
+    return &FAM.getResult<BlockFrequencyAnalysis>(F);
+  };
+
+  if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
+
 // Using the ratio b/w sums of profile count values and BFI count values to
 // adjust the func entry count.
 static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
@@ -1766,69 +1766,69 @@ static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
     });
 }
 
-static bool annotateAllFunctions( 
-    Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, 
-    function_ref<TargetLibraryInfo &(Function &)> LookupTLI, 
-    function_ref<BranchProbabilityInfo *(Function &)> LookupBPI, 
-    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, 
-    ProfileSummaryInfo *PSI, bool IsCS) { 
-  LLVM_DEBUG(dbgs() << "Read in profile counters: "); 
-  auto &Ctx = M.getContext(); 
-  // Read the counter array from file. 
-  auto ReaderOrErr = 
-      IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName); 
-  if (Error E = ReaderOrErr.takeError()) { 
-    handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 
-      Ctx.diagnose( 
-          DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message())); 
-    }); 
-    return false; 
-  } 
- 
-  std::unique_ptr<IndexedInstrProfReader> PGOReader = 
-      std::move(ReaderOrErr.get()); 
-  if (!PGOReader) { 
-    Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), 
-                                          StringRef("Cannot get PGOReader"))); 
-    return false; 
-  } 
-  if (!PGOReader->hasCSIRLevelProfile() && IsCS) 
-    return false; 
- 
-  // TODO: might need to change the warning once the clang option is finalized. 
-  if (!PGOReader->isIRLevelProfile()) { 
-    Ctx.diagnose(DiagnosticInfoPGOProfile( 
-        ProfileFileName.data(), "Not an IR level instrumentation profile")); 
-    return false; 
-  } 
- 
-  // Add the profile summary (read from the header of the indexed summary) here 
-  // so that we can use it below when reading counters (which checks if the 
-  // function should be marked with a cold or inlinehint attribute). 
-  M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), 
-                      IsCS ? ProfileSummary::PSK_CSInstr 
-                           : ProfileSummary::PSK_Instr); 
-  PSI->refresh(); 
- 
-  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; 
-  collectComdatMembers(M, ComdatMembers); 
-  std::vector<Function *> HotFunctions; 
-  std::vector<Function *> ColdFunctions; 
+static bool annotateAllFunctions(
+    Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
+    function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
+    function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
+    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
+    ProfileSummaryInfo *PSI, bool IsCS) {
+  LLVM_DEBUG(dbgs() << "Read in profile counters: ");
+  auto &Ctx = M.getContext();
+  // Read the counter array from file.
+  auto ReaderOrErr =
+      IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
+  if (Error E = ReaderOrErr.takeError()) {
+    handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
+      Ctx.diagnose(
+          DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
+    });
+    return false;
+  }
+
+  std::unique_ptr<IndexedInstrProfReader> PGOReader =
+      std::move(ReaderOrErr.get());
+  if (!PGOReader) {
+    Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
+                                          StringRef("Cannot get PGOReader")));
+    return false;
+  }
+  if (!PGOReader->hasCSIRLevelProfile() && IsCS)
+    return false;
+
+  // TODO: might need to change the warning once the clang option is finalized.
+  if (!PGOReader->isIRLevelProfile()) {
+    Ctx.diagnose(DiagnosticInfoPGOProfile(
+        ProfileFileName.data(), "Not an IR level instrumentation profile"));
+    return false;
+  }
+
+  // Add the profile summary (read from the header of the indexed summary) here
+  // so that we can use it below when reading counters (which checks if the
+  // function should be marked with a cold or inlinehint attribute).
+  M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
+                      IsCS ? ProfileSummary::PSK_CSInstr
+                           : ProfileSummary::PSK_Instr);
+  PSI->refresh();
+
+  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
+  collectComdatMembers(M, ComdatMembers);
+  std::vector<Function *> HotFunctions;
+  std::vector<Function *> ColdFunctions;
 
   // If the profile marked as always instrument the entry BB, do the
   // same. Note this can be overwritten by the internal option in CFGMST.h
   bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
   if (PGOInstrumentEntry.getNumOccurrences() > 0)
     InstrumentFuncEntry = PGOInstrumentEntry;
-  for (auto &F : M) { 
-    if (F.isDeclaration()) 
-      continue; 
-    auto &TLI = LookupTLI(F); 
-    auto *BPI = LookupBPI(F); 
-    auto *BFI = LookupBFI(F); 
-    // Split indirectbr critical edges here before computing the MST rather than 
-    // later in getInstrBB() to avoid invalidating it. 
-    SplitIndirectBrCriticalEdges(F, BPI, BFI); 
+  for (auto &F : M) {
+    if (F.isDeclaration())
+      continue;
+    auto &TLI = LookupTLI(F);
+    auto *BPI = LookupBPI(F);
+    auto *BFI = LookupBFI(F);
+    // Split indirectbr critical edges here before computing the MST rather than
+    // later in getInstrBB() to avoid invalidating it.
+    SplitIndirectBrCriticalEdges(F, BPI, BFI);
     PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
                     InstrumentFuncEntry);
     // When AllMinusOnes is true, it means the profile for the function
@@ -1836,15 +1836,15 @@ static bool annotateAllFunctions(
     // entry count of the function to be multiple times of hot threshold
     // and drop all its internal counters.
     bool AllMinusOnes = false;
-    bool AllZeros = false; 
+    bool AllZeros = false;
     if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
-      continue; 
-    if (AllZeros) { 
-      F.setEntryCount(ProfileCount(0, Function::PCT_Real)); 
-      if (Func.getProgramMaxCount() != 0) 
-        ColdFunctions.push_back(&F); 
-      continue; 
-    } 
+      continue;
+    if (AllZeros) {
+      F.setEntryCount(ProfileCount(0, Function::PCT_Real));
+      if (Func.getProgramMaxCount() != 0)
+        ColdFunctions.push_back(&F);
+      continue;
+    }
     const unsigned MultiplyFactor = 3;
     if (AllMinusOnes) {
       uint64_t HotThreshold = PSI->getHotCountThreshold();
@@ -1854,43 +1854,43 @@ static bool annotateAllFunctions(
       HotFunctions.push_back(&F);
       continue;
     }
-    Func.populateCounters(); 
-    Func.setBranchWeights(); 
-    Func.annotateValueSites(); 
-    Func.annotateIrrLoopHeaderWeights(); 
-    PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr(); 
-    if (FreqAttr == PGOUseFunc::FFA_Cold) 
-      ColdFunctions.push_back(&F); 
-    else if (FreqAttr == PGOUseFunc::FFA_Hot) 
-      HotFunctions.push_back(&F); 
-    if (PGOViewCounts != PGOVCT_None && 
-        (ViewBlockFreqFuncName.empty() || 
-         F.getName().equals(ViewBlockFreqFuncName))) { 
-      LoopInfo LI{DominatorTree(F)}; 
-      std::unique_ptr<BranchProbabilityInfo> NewBPI = 
-          std::make_unique<BranchProbabilityInfo>(F, LI); 
-      std::unique_ptr<BlockFrequencyInfo> NewBFI = 
-          std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI); 
-      if (PGOViewCounts == PGOVCT_Graph) 
-        NewBFI->view(); 
-      else if (PGOViewCounts == PGOVCT_Text) { 
-        dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n"; 
-        NewBFI->print(dbgs()); 
-      } 
-    } 
-    if (PGOViewRawCounts != PGOVCT_None && 
-        (ViewBlockFreqFuncName.empty() || 
-         F.getName().equals(ViewBlockFreqFuncName))) { 
-      if (PGOViewRawCounts == PGOVCT_Graph) 
-        if (ViewBlockFreqFuncName.empty()) 
-          WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 
-        else 
-          ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName()); 
-      else if (PGOViewRawCounts == PGOVCT_Text) { 
-        dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n"; 
-        Func.dumpInfo(); 
-      } 
-    } 
+    Func.populateCounters();
+    Func.setBranchWeights();
+    Func.annotateValueSites();
+    Func.annotateIrrLoopHeaderWeights();
+    PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
+    if (FreqAttr == PGOUseFunc::FFA_Cold)
+      ColdFunctions.push_back(&F);
+    else if (FreqAttr == PGOUseFunc::FFA_Hot)
+      HotFunctions.push_back(&F);
+    if (PGOViewCounts != PGOVCT_None &&
+        (ViewBlockFreqFuncName.empty() ||
+         F.getName().equals(ViewBlockFreqFuncName))) {
+      LoopInfo LI{DominatorTree(F)};
+      std::unique_ptr<BranchProbabilityInfo> NewBPI =
+          std::make_unique<BranchProbabilityInfo>(F, LI);
+      std::unique_ptr<BlockFrequencyInfo> NewBFI =
+          std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
+      if (PGOViewCounts == PGOVCT_Graph)
+        NewBFI->view();
+      else if (PGOViewCounts == PGOVCT_Text) {
+        dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
+        NewBFI->print(dbgs());
+      }
+    }
+    if (PGOViewRawCounts != PGOVCT_None &&
+        (ViewBlockFreqFuncName.empty() ||
+         F.getName().equals(ViewBlockFreqFuncName))) {
+      if (PGOViewRawCounts == PGOVCT_Graph)
+        if (ViewBlockFreqFuncName.empty())
+          WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
+        else
+          ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
+      else if (PGOViewRawCounts == PGOVCT_Text) {
+        dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
+        Func.dumpInfo();
+      }
+    }
 
     if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) {
       LoopInfo LI{DominatorTree(F)};
@@ -1908,18 +1908,18 @@ static bool annotateAllFunctions(
       }
       verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
     }
-  } 
- 
-  // Set function hotness attribute from the profile. 
-  // We have to apply these attributes at the end because their presence 
-  // can affect the BranchProbabilityInfo of any callers, resulting in an 
-  // inconsistent MST between prof-gen and prof-use. 
-  for (auto &F : HotFunctions) { 
-    F->addFnAttr(Attribute::InlineHint); 
-    LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() 
-                      << "\n"); 
-  } 
-  for (auto &F : ColdFunctions) { 
+  }
+
+  // Set function hotness attribute from the profile.
+  // We have to apply these attributes at the end because their presence
+  // can affect the BranchProbabilityInfo of any callers, resulting in an
+  // inconsistent MST between prof-gen and prof-use.
+  for (auto &F : HotFunctions) {
+    F->addFnAttr(Attribute::InlineHint);
+    LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
+                      << "\n");
+  }
+  for (auto &F : ColdFunctions) {
     // Only set when there is no Attribute::Hot set by the user. For Hot
     // attribute, user's annotation has the precedence over the profile.
     if (F->hasFnAttribute(Attribute::Hot)) {
@@ -1931,190 +1931,190 @@ static bool annotateAllFunctions(
           DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
       continue;
     }
-    F->addFnAttr(Attribute::Cold); 
-    LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() 
-                      << "\n"); 
-  } 
-  return true; 
-} 
- 
-PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, 
-                                             std::string RemappingFilename, 
-                                             bool IsCS) 
-    : ProfileFileName(std::move(Filename)), 
-      ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { 
-  if (!PGOTestProfileFile.empty()) 
-    ProfileFileName = PGOTestProfileFile; 
-  if (!PGOTestProfileRemappingFile.empty()) 
-    ProfileRemappingFileName = PGOTestProfileRemappingFile; 
-} 
- 
-PreservedAnalyses PGOInstrumentationUse::run(Module &M, 
-                                             ModuleAnalysisManager &AM) { 
- 
-  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 
-  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 
-    return FAM.getResult<TargetLibraryAnalysis>(F); 
-  }; 
-  auto LookupBPI = [&FAM](Function &F) { 
-    return &FAM.getResult<BranchProbabilityAnalysis>(F); 
-  }; 
-  auto LookupBFI = [&FAM](Function &F) { 
-    return &FAM.getResult<BlockFrequencyAnalysis>(F); 
-  }; 
- 
-  auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); 
- 
-  if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, 
-                            LookupTLI, LookupBPI, LookupBFI, PSI, IsCS)) 
-    return PreservedAnalyses::all(); 
- 
-  return PreservedAnalyses::none(); 
-} 
- 
-bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { 
-  if (skipModule(M)) 
-    return false; 
- 
-  auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & { 
-    return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 
-  }; 
-  auto LookupBPI = [this](Function &F) { 
-    return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI(); 
-  }; 
-  auto LookupBFI = [this](Function &F) { 
-    return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI(); 
-  }; 
- 
-  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 
-  return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI, 
-                              LookupBFI, PSI, IsCS); 
-} 
- 
-static std::string getSimpleNodeName(const BasicBlock *Node) { 
-  if (!Node->getName().empty()) 
-    return std::string(Node->getName()); 
- 
-  std::string SimpleNodeName; 
-  raw_string_ostream OS(SimpleNodeName); 
-  Node->printAsOperand(OS, false); 
-  return OS.str(); 
-} 
- 
-void llvm::setProfMetadata(Module *M, Instruction *TI, 
-                           ArrayRef<uint64_t> EdgeCounts, 
-                           uint64_t MaxCount) { 
-  MDBuilder MDB(M->getContext()); 
-  assert(MaxCount > 0 && "Bad max count"); 
-  uint64_t Scale = calculateCountScale(MaxCount); 
-  SmallVector<unsigned, 4> Weights; 
-  for (const auto &ECI : EdgeCounts) 
-    Weights.push_back(scaleBranchCount(ECI, Scale)); 
- 
-  LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W 
-                                           : Weights) { 
-    dbgs() << W << " "; 
-  } dbgs() << "\n";); 
- 
-  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 
-  if (EmitBranchProbability) { 
-    std::string BrCondStr = getBranchCondString(TI); 
-    if (BrCondStr.empty()) 
-      return; 
- 
-    uint64_t WSum = 
-        std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0, 
-                        [](uint64_t w1, uint64_t w2) { return w1 + w2; }); 
-    uint64_t TotalCount = 
-        std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0, 
-                        [](uint64_t c1, uint64_t c2) { return c1 + c2; }); 
-    Scale = calculateCountScale(WSum); 
-    BranchProbability BP(scaleBranchCount(Weights[0], Scale), 
-                         scaleBranchCount(WSum, Scale)); 
-    std::string BranchProbStr; 
-    raw_string_ostream OS(BranchProbStr); 
-    OS << BP; 
-    OS << " (total count : " << TotalCount << ")"; 
-    OS.flush(); 
-    Function *F = TI->getParent()->getParent(); 
-    OptimizationRemarkEmitter ORE(F); 
-    ORE.emit([&]() { 
-      return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) 
-             << BrCondStr << " is true with probability : " << BranchProbStr; 
-    }); 
-  } 
-} 
- 
-namespace llvm { 
- 
-void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) { 
-  MDBuilder MDB(M->getContext()); 
-  TI->setMetadata(llvm::LLVMContext::MD_irr_loop, 
-                  MDB.createIrrLoopHeaderWeight(Count)); 
-} 
- 
-template <> struct GraphTraits<PGOUseFunc *> { 
-  using NodeRef = const BasicBlock *; 
-  using ChildIteratorType = const_succ_iterator; 
-  using nodes_iterator = pointer_iterator<Function::const_iterator>; 
- 
-  static NodeRef getEntryNode(const PGOUseFunc *G) { 
-    return &G->getFunc().front(); 
-  } 
- 
-  static ChildIteratorType child_begin(const NodeRef N) { 
-    return succ_begin(N); 
-  } 
- 
-  static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } 
- 
-  static nodes_iterator nodes_begin(const PGOUseFunc *G) { 
-    return nodes_iterator(G->getFunc().begin()); 
-  } 
- 
-  static nodes_iterator nodes_end(const PGOUseFunc *G) { 
-    return nodes_iterator(G->getFunc().end()); 
-  } 
-}; 
- 
-template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits { 
-  explicit DOTGraphTraits(bool isSimple = false) 
-      : DefaultDOTGraphTraits(isSimple) {} 
- 
-  static std::string getGraphName(const PGOUseFunc *G) { 
-    return std::string(G->getFunc().getName()); 
-  } 
- 
-  std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) { 
-    std::string Result; 
-    raw_string_ostream OS(Result); 
- 
-    OS << getSimpleNodeName(Node) << ":\\l"; 
-    UseBBInfo *BI = Graph->findBBInfo(Node); 
-    OS << "Count : "; 
-    if (BI && BI->CountValid) 
-      OS << BI->CountValue << "\\l"; 
-    else 
-      OS << "Unknown\\l"; 
- 
-    if (!PGOInstrSelect) 
-      return Result; 
- 
-    for (auto BI = Node->begin(); BI != Node->end(); ++BI) { 
-      auto *I = &*BI; 
-      if (!isa<SelectInst>(I)) 
-        continue; 
-      // Display scaled counts for SELECT instruction: 
-      OS << "SELECT : { T = "; 
-      uint64_t TC, FC; 
-      bool HasProf = I->extractProfMetadata(TC, FC); 
-      if (!HasProf) 
-        OS << "Unknown, F = Unknown }\\l"; 
-      else 
-        OS << TC << ", F = " << FC << " }\\l"; 
-    } 
-    return Result; 
-  } 
-}; 
- 
-} // end namespace llvm 
+    F->addFnAttr(Attribute::Cold);
+    LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
+                      << "\n");
+  }
+  return true;
+}
+
+PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
+                                             std::string RemappingFilename,
+                                             bool IsCS)
+    : ProfileFileName(std::move(Filename)),
+      ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
+  if (!PGOTestProfileFile.empty())
+    ProfileFileName = PGOTestProfileFile;
+  if (!PGOTestProfileRemappingFile.empty())
+    ProfileRemappingFileName = PGOTestProfileRemappingFile;
+}
+
+PreservedAnalyses PGOInstrumentationUse::run(Module &M,
+                                             ModuleAnalysisManager &AM) {
+
+  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
+  auto LookupBPI = [&FAM](Function &F) {
+    return &FAM.getResult<BranchProbabilityAnalysis>(F);
+  };
+  auto LookupBFI = [&FAM](Function &F) {
+    return &FAM.getResult<BlockFrequencyAnalysis>(F);
+  };
+
+  auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
+  if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
+                            LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
+
+bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
+  if (skipModule(M))
+    return false;
+
+  auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
+    return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  };
+  auto LookupBPI = [this](Function &F) {
+    return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
+  };
+  auto LookupBFI = [this](Function &F) {
+    return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
+  };
+
+  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI,
+                              LookupBFI, PSI, IsCS);
+}
+
+static std::string getSimpleNodeName(const BasicBlock *Node) {
+  if (!Node->getName().empty())
+    return std::string(Node->getName());
+
+  std::string SimpleNodeName;
+  raw_string_ostream OS(SimpleNodeName);
+  Node->printAsOperand(OS, false);
+  return OS.str();
+}
+
+void llvm::setProfMetadata(Module *M, Instruction *TI,
+                           ArrayRef<uint64_t> EdgeCounts,
+                           uint64_t MaxCount) {
+  MDBuilder MDB(M->getContext());
+  assert(MaxCount > 0 && "Bad max count");
+  uint64_t Scale = calculateCountScale(MaxCount);
+  SmallVector<unsigned, 4> Weights;
+  for (const auto &ECI : EdgeCounts)
+    Weights.push_back(scaleBranchCount(ECI, Scale));
+
+  LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
+                                           : Weights) {
+    dbgs() << W << " ";
+  } dbgs() << "\n";);
+
+  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+  if (EmitBranchProbability) {
+    std::string BrCondStr = getBranchCondString(TI);
+    if (BrCondStr.empty())
+      return;
+
+    uint64_t WSum =
+        std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
+                        [](uint64_t w1, uint64_t w2) { return w1 + w2; });
+    uint64_t TotalCount =
+        std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
+                        [](uint64_t c1, uint64_t c2) { return c1 + c2; });
+    Scale = calculateCountScale(WSum);
+    BranchProbability BP(scaleBranchCount(Weights[0], Scale),
+                         scaleBranchCount(WSum, Scale));
+    std::string BranchProbStr;
+    raw_string_ostream OS(BranchProbStr);
+    OS << BP;
+    OS << " (total count : " << TotalCount << ")";
+    OS.flush();
+    Function *F = TI->getParent()->getParent();
+    OptimizationRemarkEmitter ORE(F);
+    ORE.emit([&]() {
+      return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
+             << BrCondStr << " is true with probability : " << BranchProbStr;
+    });
+  }
+}
+
+namespace llvm {
+
+void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
+  MDBuilder MDB(M->getContext());
+  TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
+                  MDB.createIrrLoopHeaderWeight(Count));
+}
+
+template <> struct GraphTraits<PGOUseFunc *> {
+  using NodeRef = const BasicBlock *;
+  using ChildIteratorType = const_succ_iterator;
+  using nodes_iterator = pointer_iterator<Function::const_iterator>;
+
+  static NodeRef getEntryNode(const PGOUseFunc *G) {
+    return &G->getFunc().front();
+  }
+
+  static ChildIteratorType child_begin(const NodeRef N) {
+    return succ_begin(N);
+  }
+
+  static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
+
+  static nodes_iterator nodes_begin(const PGOUseFunc *G) {
+    return nodes_iterator(G->getFunc().begin());
+  }
+
+  static nodes_iterator nodes_end(const PGOUseFunc *G) {
+    return nodes_iterator(G->getFunc().end());
+  }
+};
+
+template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
+  explicit DOTGraphTraits(bool isSimple = false)
+      : DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getGraphName(const PGOUseFunc *G) {
+    return std::string(G->getFunc().getName());
+  }
+
+  std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
+    std::string Result;
+    raw_string_ostream OS(Result);
+
+    OS << getSimpleNodeName(Node) << ":\\l";
+    UseBBInfo *BI = Graph->findBBInfo(Node);
+    OS << "Count : ";
+    if (BI && BI->CountValid)
+      OS << BI->CountValue << "\\l";
+    else
+      OS << "Unknown\\l";
+
+    if (!PGOInstrSelect)
+      return Result;
+
+    for (auto BI = Node->begin(); BI != Node->end(); ++BI) {
+      auto *I = &*BI;
+      if (!isa<SelectInst>(I))
+        continue;
+      // Display scaled counts for SELECT instruction:
+      OS << "SELECT : { T = ";
+      uint64_t TC, FC;
+      bool HasProf = I->extractProfMetadata(TC, FC);
+      if (!HasProf)
+        OS << "Unknown, F = Unknown }\\l";
+      else
+        OS << TC << ", F = " << FC << " }\\l";
+    }
+    return Result;
+  }
+};
+
+} // end namespace llvm
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index edc72d79eb..55a93b6152 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -1,527 +1,527 @@
-//===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file implements the transformation that optimizes memory intrinsics 
-// such as memcpy using the size value profile. When memory intrinsic size 
-// value profile metadata is available, a single memory intrinsic is expanded 
-// to a sequence of guarded specialized versions that are called with the 
-// hottest size(s), for later expansion into more optimal inline sequences. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/ADT/ArrayRef.h" 
-#include "llvm/ADT/Statistic.h" 
-#include "llvm/ADT/StringRef.h" 
-#include "llvm/ADT/Twine.h" 
-#include "llvm/Analysis/BlockFrequencyInfo.h" 
-#include "llvm/Analysis/DomTreeUpdater.h" 
-#include "llvm/Analysis/GlobalsModRef.h" 
-#include "llvm/Analysis/OptimizationRemarkEmitter.h" 
+//===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the transformation that optimizes memory intrinsics
+// such as memcpy using the size value profile. When memory intrinsic size
+// value profile metadata is available, a single memory intrinsic is expanded
+// to a sequence of guarded specialized versions that are called with the
+// hottest size(s), for later expansion into more optimal inline sequences.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/BasicBlock.h" 
-#include "llvm/IR/DerivedTypes.h" 
-#include "llvm/IR/Dominators.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/InstVisitor.h" 
-#include "llvm/IR/InstrTypes.h" 
-#include "llvm/IR/Instruction.h" 
-#include "llvm/IR/Instructions.h" 
-#include "llvm/IR/LLVMContext.h" 
-#include "llvm/IR/PassManager.h" 
-#include "llvm/IR/Type.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Pass.h" 
-#include "llvm/PassRegistry.h" 
-#include "llvm/ProfileData/InstrProf.h" 
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/ProfileData/InstrProf.h"
 #define INSTR_PROF_VALUE_PROF_MEMOP_API
 #include "llvm/ProfileData/InstrProfData.inc"
-#include "llvm/Support/Casting.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/ErrorHandling.h" 
-#include "llvm/Support/MathExtras.h" 
-#include "llvm/Transforms/Instrumentation.h" 
-#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include <cassert> 
-#include <cstdint> 
-#include <vector> 
- 
-using namespace llvm; 
- 
-#define DEBUG_TYPE "pgo-memop-opt" 
- 
-STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized."); 
-STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated."); 
- 
-// The minimum call count to optimize memory intrinsic calls. 
-static cl::opt<unsigned> 
-    MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, 
-                        cl::init(1000), 
-                        cl::desc("The minimum count to optimize memory " 
-                                 "intrinsic calls")); 
- 
-// Command line option to disable memory intrinsic optimization. The default is 
-// false. This is for debug purpose. 
-static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false), 
-                                     cl::Hidden, cl::desc("Disable optimize")); 
- 
-// The percent threshold to optimize memory intrinsic calls. 
-static cl::opt<unsigned> 
-    MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), 
-                          cl::Hidden, cl::ZeroOrMore, 
-                          cl::desc("The percentage threshold for the " 
-                                   "memory intrinsic calls optimization")); 
- 
-// Maximum number of versions for optimizing memory intrinsic call. 
-static cl::opt<unsigned> 
-    MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, 
-                    cl::ZeroOrMore, 
-                    cl::desc("The max version for the optimized memory " 
-                             " intrinsic calls")); 
- 
-// Scale the counts from the annotation using the BB count value. 
-static cl::opt<bool> 
-    MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, 
-                    cl::desc("Scale the memop size counts using the basic " 
-                             " block count value")); 
- 
-cl::opt<bool> 
-    MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), 
-                       cl::Hidden, 
-                       cl::desc("Size-specialize memcmp and bcmp calls")); 
- 
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <cassert>
+#include <cstdint>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pgo-memop-opt"
+
+STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
+STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
+
+// The minimum call count to optimize memory intrinsic calls.
+static cl::opt<unsigned>
+    MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
+                        cl::init(1000),
+                        cl::desc("The minimum count to optimize memory "
+                                 "intrinsic calls"));
+
+// Command line option to disable memory intrinsic optimization. The default is
+// false. This is for debug purpose.
+static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
+                                     cl::Hidden, cl::desc("Disable optimize"));
+
+// The percent threshold to optimize memory intrinsic calls.
+static cl::opt<unsigned>
+    MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
+                          cl::Hidden, cl::ZeroOrMore,
+                          cl::desc("The percentage threshold for the "
+                                   "memory intrinsic calls optimization"));
+
+// Maximum number of versions for optimizing memory intrinsic call.
+static cl::opt<unsigned>
+    MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
+                    cl::ZeroOrMore,
+                    cl::desc("The max version for the optimized memory "
+                             " intrinsic calls"));
+
+// Scale the counts from the annotation using the BB count value.
+static cl::opt<bool>
+    MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
+                    cl::desc("Scale the memop size counts using the basic "
+                             " block count value"));
+
+cl::opt<bool>
+    MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true),
+                       cl::Hidden,
+                       cl::desc("Size-specialize memcmp and bcmp calls"));
+
 static cl::opt<unsigned>
     MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128),
                     cl::desc("Optimize the memop size <= this value"));
 
-namespace { 
-class PGOMemOPSizeOptLegacyPass : public FunctionPass { 
-public: 
-  static char ID; 
- 
-  PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) { 
-    initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry()); 
-  } 
- 
-  StringRef getPassName() const override { return "PGOMemOPSize"; } 
- 
-private: 
-  bool runOnFunction(Function &F) override; 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.addRequired<BlockFrequencyInfoWrapperPass>(); 
-    AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 
-    AU.addPreserved<GlobalsAAWrapperPass>(); 
-    AU.addPreserved<DominatorTreeWrapperPass>(); 
-    AU.addRequired<TargetLibraryInfoWrapperPass>(); 
-  } 
-}; 
-} // end anonymous namespace 
- 
-char PGOMemOPSizeOptLegacyPass::ID = 0; 
-INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", 
-                      "Optimize memory intrinsic using its size value profile", 
-                      false, false) 
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 
-INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", 
-                    "Optimize memory intrinsic using its size value profile", 
-                    false, false) 
- 
-FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { 
-  return new PGOMemOPSizeOptLegacyPass(); 
-} 
- 
-namespace { 
- 
-static const char *getMIName(const MemIntrinsic *MI) { 
-  switch (MI->getIntrinsicID()) { 
-  case Intrinsic::memcpy: 
-    return "memcpy"; 
-  case Intrinsic::memmove: 
-    return "memmove"; 
-  case Intrinsic::memset: 
-    return "memset"; 
-  default: 
-    return "unknown"; 
-  } 
-} 
- 
-// A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp). 
-struct MemOp { 
-  Instruction *I; 
-  MemOp(MemIntrinsic *MI) : I(MI) {} 
-  MemOp(CallInst *CI) : I(CI) {} 
-  MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); } 
-  CallInst *asCI() { return cast<CallInst>(I); } 
-  MemOp clone() { 
-    if (auto MI = asMI()) 
-      return MemOp(cast<MemIntrinsic>(MI->clone())); 
-    return MemOp(cast<CallInst>(asCI()->clone())); 
-  } 
-  Value *getLength() { 
-    if (auto MI = asMI()) 
-      return MI->getLength(); 
-    return asCI()->getArgOperand(2); 
-  } 
-  void setLength(Value *Length) { 
-    if (auto MI = asMI()) 
-      return MI->setLength(Length); 
-    asCI()->setArgOperand(2, Length); 
-  } 
-  StringRef getFuncName() { 
-    if (auto MI = asMI()) 
-      return MI->getCalledFunction()->getName(); 
-    return asCI()->getCalledFunction()->getName(); 
-  } 
-  bool isMemmove() { 
-    if (auto MI = asMI()) 
-      if (MI->getIntrinsicID() == Intrinsic::memmove) 
-        return true; 
-    return false; 
-  } 
-  bool isMemcmp(TargetLibraryInfo &TLI) { 
-    LibFunc Func; 
-    if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) && 
-        Func == LibFunc_memcmp) { 
-      return true; 
-    } 
-    return false; 
-  } 
-  bool isBcmp(TargetLibraryInfo &TLI) { 
-    LibFunc Func; 
-    if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) && 
-        Func == LibFunc_bcmp) { 
-      return true; 
-    } 
-    return false; 
-  } 
-  const char *getName(TargetLibraryInfo &TLI) { 
-    if (auto MI = asMI()) 
-      return getMIName(MI); 
-    LibFunc Func; 
-    if (TLI.getLibFunc(*asCI(), Func)) { 
-      if (Func == LibFunc_memcmp) 
-        return "memcmp"; 
-      if (Func == LibFunc_bcmp) 
-        return "bcmp"; 
-    } 
-    llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst"); 
-    return nullptr; 
-  } 
-}; 
- 
-class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> { 
-public: 
-  MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI, 
-               OptimizationRemarkEmitter &ORE, DominatorTree *DT, 
-               TargetLibraryInfo &TLI) 
-      : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) { 
-    ValueDataArray = 
-        std::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2); 
-  } 
-  bool isChanged() const { return Changed; } 
-  void perform() { 
-    WorkList.clear(); 
-    visit(Func); 
- 
-    for (auto &MO : WorkList) { 
-      ++NumOfPGOMemOPAnnotate; 
-      if (perform(MO)) { 
-        Changed = true; 
-        ++NumOfPGOMemOPOpt; 
-        LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName() 
-                          << "is Transformed.\n"); 
-      } 
-    } 
-  } 
- 
-  void visitMemIntrinsic(MemIntrinsic &MI) { 
-    Value *Length = MI.getLength(); 
-    // Not perform on constant length calls. 
-    if (dyn_cast<ConstantInt>(Length)) 
-      return; 
-    WorkList.push_back(MemOp(&MI)); 
-  } 
- 
-  void visitCallInst(CallInst &CI) { 
-    LibFunc Func; 
-    if (TLI.getLibFunc(CI, Func) && 
-        (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && 
+namespace {
+class PGOMemOPSizeOptLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
+    initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "PGOMemOPSize"; }
+
+private:
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+};
+} // end anonymous namespace
+
+char PGOMemOPSizeOptLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
+                      "Optimize memory intrinsic using its size value profile",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
+                    "Optimize memory intrinsic using its size value profile",
+                    false, false)
+
+FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() {
+  return new PGOMemOPSizeOptLegacyPass();
+}
+
+namespace {
+
+static const char *getMIName(const MemIntrinsic *MI) {
+  switch (MI->getIntrinsicID()) {
+  case Intrinsic::memcpy:
+    return "memcpy";
+  case Intrinsic::memmove:
+    return "memmove";
+  case Intrinsic::memset:
+    return "memset";
+  default:
+    return "unknown";
+  }
+}
+
+// A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp).
+struct MemOp {
+  Instruction *I;
+  MemOp(MemIntrinsic *MI) : I(MI) {}
+  MemOp(CallInst *CI) : I(CI) {}
+  MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); }
+  CallInst *asCI() { return cast<CallInst>(I); }
+  MemOp clone() {
+    if (auto MI = asMI())
+      return MemOp(cast<MemIntrinsic>(MI->clone()));
+    return MemOp(cast<CallInst>(asCI()->clone()));
+  }
+  Value *getLength() {
+    if (auto MI = asMI())
+      return MI->getLength();
+    return asCI()->getArgOperand(2);
+  }
+  void setLength(Value *Length) {
+    if (auto MI = asMI())
+      return MI->setLength(Length);
+    asCI()->setArgOperand(2, Length);
+  }
+  StringRef getFuncName() {
+    if (auto MI = asMI())
+      return MI->getCalledFunction()->getName();
+    return asCI()->getCalledFunction()->getName();
+  }
+  bool isMemmove() {
+    if (auto MI = asMI())
+      if (MI->getIntrinsicID() == Intrinsic::memmove)
+        return true;
+    return false;
+  }
+  bool isMemcmp(TargetLibraryInfo &TLI) {
+    LibFunc Func;
+    if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
+        Func == LibFunc_memcmp) {
+      return true;
+    }
+    return false;
+  }
+  bool isBcmp(TargetLibraryInfo &TLI) {
+    LibFunc Func;
+    if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
+        Func == LibFunc_bcmp) {
+      return true;
+    }
+    return false;
+  }
+  const char *getName(TargetLibraryInfo &TLI) {
+    if (auto MI = asMI())
+      return getMIName(MI);
+    LibFunc Func;
+    if (TLI.getLibFunc(*asCI(), Func)) {
+      if (Func == LibFunc_memcmp)
+        return "memcmp";
+      if (Func == LibFunc_bcmp)
+        return "bcmp";
+    }
+    llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst");
+    return nullptr;
+  }
+};
+
+class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
+public:
+  MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
+               OptimizationRemarkEmitter &ORE, DominatorTree *DT,
+               TargetLibraryInfo &TLI)
+      : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) {
+    ValueDataArray =
+        std::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
+  }
+  bool isChanged() const { return Changed; }
+  void perform() {
+    WorkList.clear();
+    visit(Func);
+
+    for (auto &MO : WorkList) {
+      ++NumOfPGOMemOPAnnotate;
+      if (perform(MO)) {
+        Changed = true;
+        ++NumOfPGOMemOPOpt;
+        LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName()
+                          << "is Transformed.\n");
+      }
+    }
+  }
+
+  void visitMemIntrinsic(MemIntrinsic &MI) {
+    Value *Length = MI.getLength();
+    // Not perform on constant length calls.
+    if (dyn_cast<ConstantInt>(Length))
+      return;
+    WorkList.push_back(MemOp(&MI));
+  }
+
+  void visitCallInst(CallInst &CI) {
+    LibFunc Func;
+    if (TLI.getLibFunc(CI, Func) &&
+        (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
         !isa<ConstantInt>(CI.getArgOperand(2))) {
-      WorkList.push_back(MemOp(&CI)); 
-    } 
-  } 
- 
-private: 
-  Function &Func; 
-  BlockFrequencyInfo &BFI; 
-  OptimizationRemarkEmitter &ORE; 
-  DominatorTree *DT; 
-  TargetLibraryInfo &TLI; 
-  bool Changed; 
-  std::vector<MemOp> WorkList; 
-  // The space to read the profile annotation. 
-  std::unique_ptr<InstrProfValueData[]> ValueDataArray; 
-  bool perform(MemOp MO); 
-}; 
- 
-static bool isProfitable(uint64_t Count, uint64_t TotalCount) { 
-  assert(Count <= TotalCount); 
-  if (Count < MemOPCountThreshold) 
-    return false; 
-  if (Count < TotalCount * MemOPPercentThreshold / 100) 
-    return false; 
-  return true; 
-} 
- 
-static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, 
-                                      uint64_t Denom) { 
-  if (!MemOPScaleCount) 
-    return Count; 
-  bool Overflowed; 
-  uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed); 
-  return ScaleCount / Denom; 
-} 
- 
-bool MemOPSizeOpt::perform(MemOp MO) { 
-  assert(MO.I); 
-  if (MO.isMemmove()) 
-    return false; 
-  if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI))) 
-    return false; 
- 
-  uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; 
-  uint64_t TotalCount; 
-  if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumPromotions, 
-                                ValueDataArray.get(), NumVals, TotalCount)) 
-    return false; 
- 
-  uint64_t ActualCount = TotalCount; 
-  uint64_t SavedTotalCount = TotalCount; 
-  if (MemOPScaleCount) { 
-    auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent()); 
-    if (!BBEdgeCount) 
-      return false; 
-    ActualCount = *BBEdgeCount; 
-  } 
- 
-  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals); 
-  LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count " 
-                    << ActualCount << "\n"); 
-  LLVM_DEBUG( 
-      for (auto &VD 
-           : VDs) { dbgs() << "  (" << VD.Value << "," << VD.Count << ")\n"; }); 
- 
-  if (ActualCount < MemOPCountThreshold) 
-    return false; 
-  // Skip if the total value profiled count is 0, in which case we can't 
-  // scale up the counts properly (and there is no profitable transformation). 
-  if (TotalCount == 0) 
-    return false; 
- 
-  TotalCount = ActualCount; 
-  if (MemOPScaleCount) 
-    LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount 
-                      << " denominator = " << SavedTotalCount << "\n"); 
- 
-  // Keeping track of the count of the default case: 
-  uint64_t RemainCount = TotalCount; 
-  uint64_t SavedRemainCount = SavedTotalCount; 
-  SmallVector<uint64_t, 16> SizeIds; 
-  SmallVector<uint64_t, 16> CaseCounts; 
-  uint64_t MaxCount = 0; 
-  unsigned Version = 0; 
-  // Default case is in the front -- save the slot here. 
-  CaseCounts.push_back(0); 
-  for (auto &VD : VDs) { 
-    int64_t V = VD.Value; 
-    uint64_t C = VD.Count; 
-    if (MemOPScaleCount) 
-      C = getScaledCount(C, ActualCount, SavedTotalCount); 
- 
+      WorkList.push_back(MemOp(&CI));
+    }
+  }
+
+private:
+  Function &Func;
+  BlockFrequencyInfo &BFI;
+  OptimizationRemarkEmitter &ORE;
+  DominatorTree *DT;
+  TargetLibraryInfo &TLI;
+  bool Changed;
+  std::vector<MemOp> WorkList;
+  // The space to read the profile annotation.
+  std::unique_ptr<InstrProfValueData[]> ValueDataArray;
+  bool perform(MemOp MO);
+};
+
+static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
+  assert(Count <= TotalCount);
+  if (Count < MemOPCountThreshold)
+    return false;
+  if (Count < TotalCount * MemOPPercentThreshold / 100)
+    return false;
+  return true;
+}
+
+static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
+                                      uint64_t Denom) {
+  if (!MemOPScaleCount)
+    return Count;
+  bool Overflowed;
+  uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
+  return ScaleCount / Denom;
+}
+
+bool MemOPSizeOpt::perform(MemOp MO) {
+  assert(MO.I);
+  if (MO.isMemmove())
+    return false;
+  if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI)))
+    return false;
+
+  uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2;
+  uint64_t TotalCount;
+  if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumPromotions,
+                                ValueDataArray.get(), NumVals, TotalCount))
+    return false;
+
+  uint64_t ActualCount = TotalCount;
+  uint64_t SavedTotalCount = TotalCount;
+  if (MemOPScaleCount) {
+    auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent());
+    if (!BBEdgeCount)
+      return false;
+    ActualCount = *BBEdgeCount;
+  }
+
+  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
+  LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count "
+                    << ActualCount << "\n");
+  LLVM_DEBUG(
+      for (auto &VD
+           : VDs) { dbgs() << "  (" << VD.Value << "," << VD.Count << ")\n"; });
+
+  if (ActualCount < MemOPCountThreshold)
+    return false;
+  // Skip if the total value profiled count is 0, in which case we can't
+  // scale up the counts properly (and there is no profitable transformation).
+  if (TotalCount == 0)
+    return false;
+
+  TotalCount = ActualCount;
+  if (MemOPScaleCount)
+    LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
+                      << " denominator = " << SavedTotalCount << "\n");
+
+  // Keeping track of the count of the default case:
+  uint64_t RemainCount = TotalCount;
+  uint64_t SavedRemainCount = SavedTotalCount;
+  SmallVector<uint64_t, 16> SizeIds;
+  SmallVector<uint64_t, 16> CaseCounts;
+  uint64_t MaxCount = 0;
+  unsigned Version = 0;
+  // Default case is in the front -- save the slot here.
+  CaseCounts.push_back(0);
+  for (auto &VD : VDs) {
+    int64_t V = VD.Value;
+    uint64_t C = VD.Count;
+    if (MemOPScaleCount)
+      C = getScaledCount(C, ActualCount, SavedTotalCount);
+
     if (!InstrProfIsSingleValRange(V) || V > MemOpMaxOptSize)
-      continue; 
- 
-    // ValueCounts are sorted on the count. Break at the first un-profitable 
-    // value. 
-    if (!isProfitable(C, RemainCount)) 
-      break; 
- 
-    SizeIds.push_back(V); 
-    CaseCounts.push_back(C); 
-    if (C > MaxCount) 
-      MaxCount = C; 
- 
-    assert(RemainCount >= C); 
-    RemainCount -= C; 
-    assert(SavedRemainCount >= VD.Count); 
-    SavedRemainCount -= VD.Count; 
- 
-    if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0) 
-      break; 
-  } 
- 
-  if (Version == 0) 
-    return false; 
- 
-  CaseCounts[0] = RemainCount; 
-  if (RemainCount > MaxCount) 
-    MaxCount = RemainCount; 
- 
-  uint64_t SumForOpt = TotalCount - RemainCount; 
- 
-  LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version 
-                    << " Versions (covering " << SumForOpt << " out of " 
-                    << TotalCount << ")\n"); 
- 
-  // mem_op(..., size) 
-  // ==> 
-  // switch (size) { 
-  //   case s1: 
-  //      mem_op(..., s1); 
-  //      goto merge_bb; 
-  //   case s2: 
-  //      mem_op(..., s2); 
-  //      goto merge_bb; 
-  //   ... 
-  //   default: 
-  //      mem_op(..., size); 
-  //      goto merge_bb; 
-  // } 
-  // merge_bb: 
- 
-  BasicBlock *BB = MO.I->getParent(); 
-  LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); 
-  LLVM_DEBUG(dbgs() << *BB << "\n"); 
-  auto OrigBBFreq = BFI.getBlockFreq(BB); 
- 
-  BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT); 
-  BasicBlock::iterator It(*MO.I); 
-  ++It; 
-  assert(It != DefaultBB->end()); 
-  BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT); 
-  MergeBB->setName("MemOP.Merge"); 
-  BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); 
-  DefaultBB->setName("MemOP.Default"); 
- 
-  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 
-  auto &Ctx = Func.getContext(); 
-  IRBuilder<> IRB(BB); 
-  BB->getTerminator()->eraseFromParent(); 
-  Value *SizeVar = MO.getLength(); 
-  SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); 
-  Type *MemOpTy = MO.I->getType(); 
-  PHINode *PHI = nullptr; 
-  if (!MemOpTy->isVoidTy()) { 
-    // Insert a phi for the return values at the merge block. 
-    IRBuilder<> IRBM(MergeBB->getFirstNonPHI()); 
-    PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge"); 
-    MO.I->replaceAllUsesWith(PHI); 
-    PHI->addIncoming(MO.I, DefaultBB); 
-  } 
- 
-  // Clear the value profile data. 
-  MO.I->setMetadata(LLVMContext::MD_prof, nullptr); 
-  // If all promoted, we don't need the MD.prof metadata. 
-  if (SavedRemainCount > 0 || Version != NumVals) 
-    // Otherwise we need update with the un-promoted records back. 
-    annotateValueSite(*Func.getParent(), *MO.I, VDs.slice(Version), 
-                      SavedRemainCount, IPVK_MemOPSize, NumVals); 
- 
-  LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n"); 
- 
-  std::vector<DominatorTree::UpdateType> Updates; 
-  if (DT) 
-    Updates.reserve(2 * SizeIds.size()); 
- 
-  for (uint64_t SizeId : SizeIds) { 
-    BasicBlock *CaseBB = BasicBlock::Create( 
-        Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); 
-    MemOp NewMO = MO.clone(); 
-    // Fix the argument. 
-    auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType()); 
-    assert(SizeType && "Expected integer type size argument."); 
-    ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId); 
-    NewMO.setLength(CaseSizeId); 
-    CaseBB->getInstList().push_back(NewMO.I); 
-    IRBuilder<> IRBCase(CaseBB); 
-    IRBCase.CreateBr(MergeBB); 
-    SI->addCase(CaseSizeId, CaseBB); 
-    if (!MemOpTy->isVoidTy()) 
-      PHI->addIncoming(NewMO.I, CaseBB); 
-    if (DT) { 
-      Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB}); 
-      Updates.push_back({DominatorTree::Insert, BB, CaseBB}); 
-    } 
-    LLVM_DEBUG(dbgs() << *CaseBB << "\n"); 
-  } 
-  DTU.applyUpdates(Updates); 
-  Updates.clear(); 
- 
-  setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); 
- 
-  LLVM_DEBUG(dbgs() << *BB << "\n"); 
-  LLVM_DEBUG(dbgs() << *DefaultBB << "\n"); 
-  LLVM_DEBUG(dbgs() << *MergeBB << "\n"); 
- 
-  ORE.emit([&]() { 
-    using namespace ore; 
-    return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I) 
-           << "optimized " << NV("Memop", MO.getName(TLI)) << " with count " 
-           << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount) 
-           << " for " << NV("Versions", Version) << " versions"; 
-  }); 
- 
-  return true; 
-} 
-} // namespace 
- 
-static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, 
-                                OptimizationRemarkEmitter &ORE, 
-                                DominatorTree *DT, TargetLibraryInfo &TLI) { 
-  if (DisableMemOPOPT) 
-    return false; 
- 
-  if (F.hasFnAttribute(Attribute::OptimizeForSize)) 
-    return false; 
-  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI); 
-  MemOPSizeOpt.perform(); 
-  return MemOPSizeOpt.isChanged(); 
-} 
- 
-bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { 
-  BlockFrequencyInfo &BFI = 
-      getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 
-  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 
-  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 
-  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; 
-  TargetLibraryInfo &TLI = 
-      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 
-  return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI); 
-} 
- 
-namespace llvm { 
-char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID; 
- 
-PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, 
-                                       FunctionAnalysisManager &FAM) { 
-  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 
-  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 
-  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); 
-  auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 
-  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI); 
-  if (!Changed) 
-    return PreservedAnalyses::all(); 
-  auto PA = PreservedAnalyses(); 
-  PA.preserve<GlobalsAA>(); 
-  PA.preserve<DominatorTreeAnalysis>(); 
-  return PA; 
-} 
-} // namespace llvm 
+      continue;
+
+    // ValueCounts are sorted on the count. Break at the first un-profitable
+    // value.
+    if (!isProfitable(C, RemainCount))
+      break;
+
+    SizeIds.push_back(V);
+    CaseCounts.push_back(C);
+    if (C > MaxCount)
+      MaxCount = C;
+
+    assert(RemainCount >= C);
+    RemainCount -= C;
+    assert(SavedRemainCount >= VD.Count);
+    SavedRemainCount -= VD.Count;
+
+    if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0)
+      break;
+  }
+
+  if (Version == 0)
+    return false;
+
+  CaseCounts[0] = RemainCount;
+  if (RemainCount > MaxCount)
+    MaxCount = RemainCount;
+
+  uint64_t SumForOpt = TotalCount - RemainCount;
+
+  LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
+                    << " Versions (covering " << SumForOpt << " out of "
+                    << TotalCount << ")\n");
+
+  // mem_op(..., size)
+  // ==>
+  // switch (size) {
+  //   case s1:
+  //      mem_op(..., s1);
+  //      goto merge_bb;
+  //   case s2:
+  //      mem_op(..., s2);
+  //      goto merge_bb;
+  //   ...
+  //   default:
+  //      mem_op(..., size);
+  //      goto merge_bb;
+  // }
+  // merge_bb:
+
+  BasicBlock *BB = MO.I->getParent();
+  LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
+  LLVM_DEBUG(dbgs() << *BB << "\n");
+  auto OrigBBFreq = BFI.getBlockFreq(BB);
+
+  BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT);
+  BasicBlock::iterator It(*MO.I);
+  ++It;
+  assert(It != DefaultBB->end());
+  BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
+  MergeBB->setName("MemOP.Merge");
+  BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
+  DefaultBB->setName("MemOP.Default");
+
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+  auto &Ctx = Func.getContext();
+  IRBuilder<> IRB(BB);
+  BB->getTerminator()->eraseFromParent();
+  Value *SizeVar = MO.getLength();
+  SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
+  Type *MemOpTy = MO.I->getType();
+  PHINode *PHI = nullptr;
+  if (!MemOpTy->isVoidTy()) {
+    // Insert a phi for the return values at the merge block.
+    IRBuilder<> IRBM(MergeBB->getFirstNonPHI());
+    PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge");
+    MO.I->replaceAllUsesWith(PHI);
+    PHI->addIncoming(MO.I, DefaultBB);
+  }
+
+  // Clear the value profile data.
+  MO.I->setMetadata(LLVMContext::MD_prof, nullptr);
+  // If all promoted, we don't need the MD.prof metadata.
+  if (SavedRemainCount > 0 || Version != NumVals)
+    // Otherwise we need update with the un-promoted records back.
+    annotateValueSite(*Func.getParent(), *MO.I, VDs.slice(Version),
+                      SavedRemainCount, IPVK_MemOPSize, NumVals);
+
+  LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
+
+  std::vector<DominatorTree::UpdateType> Updates;
+  if (DT)
+    Updates.reserve(2 * SizeIds.size());
+
+  for (uint64_t SizeId : SizeIds) {
+    BasicBlock *CaseBB = BasicBlock::Create(
+        Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
+    MemOp NewMO = MO.clone();
+    // Fix the argument.
+    auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType());
+    assert(SizeType && "Expected integer type size argument.");
+    ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
+    NewMO.setLength(CaseSizeId);
+    CaseBB->getInstList().push_back(NewMO.I);
+    IRBuilder<> IRBCase(CaseBB);
+    IRBCase.CreateBr(MergeBB);
+    SI->addCase(CaseSizeId, CaseBB);
+    if (!MemOpTy->isVoidTy())
+      PHI->addIncoming(NewMO.I, CaseBB);
+    if (DT) {
+      Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
+      Updates.push_back({DominatorTree::Insert, BB, CaseBB});
+    }
+    LLVM_DEBUG(dbgs() << *CaseBB << "\n");
+  }
+  DTU.applyUpdates(Updates);
+  Updates.clear();
+
+  setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
+
+  LLVM_DEBUG(dbgs() << *BB << "\n");
+  LLVM_DEBUG(dbgs() << *DefaultBB << "\n");
+  LLVM_DEBUG(dbgs() << *MergeBB << "\n");
+
+  ORE.emit([&]() {
+    using namespace ore;
+    return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I)
+           << "optimized " << NV("Memop", MO.getName(TLI)) << " with count "
+           << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount)
+           << " for " << NV("Versions", Version) << " versions";
+  });
+
+  return true;
+}
+} // namespace
+
+static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI,
+                                OptimizationRemarkEmitter &ORE,
+                                DominatorTree *DT, TargetLibraryInfo &TLI) {
+  if (DisableMemOPOPT)
+    return false;
+
+  if (F.hasFnAttribute(Attribute::OptimizeForSize))
+    return false;
+  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI);
+  MemOPSizeOpt.perform();
+  return MemOPSizeOpt.isChanged();
+}
+
+bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
+  BlockFrequencyInfo &BFI =
+      getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+  auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  TargetLibraryInfo &TLI =
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
+}
+
+namespace llvm {
+char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID;
+
+PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
+                                       FunctionAnalysisManager &FAM) {
+  auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+  auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
+  if (!Changed)
+    return PreservedAnalyses::all();
+  auto PA = PreservedAnalyses();
+  PA.preserve<GlobalsAA>();
+  PA.preserve<DominatorTreeAnalysis>();
+  return PA;
+}
+} // namespace llvm
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/PoisonChecking.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/PoisonChecking.cpp
index bb822f7b27..fc52672618 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/PoisonChecking.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/PoisonChecking.cpp
@@ -1,359 +1,359 @@
-//===- PoisonChecking.cpp - -----------------------------------------------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// Implements a transform pass which instruments IR such that poison semantics 
-// are made explicit.  That is, it provides a (possibly partial) executable 
-// semantics for every instruction w.r.t. poison as specified in the LLVM 
-// LangRef.  There are obvious parallels to the sanitizer tools, but this pass 
-// is focused purely on the semantics of LLVM IR, not any particular source 
-// language.   If you're looking for something to see if your C/C++ contains 
-// UB, this is not it. 
-// 
-// The rewritten semantics of each instruction will include the following 
-// components: 
-// 
-// 1) The original instruction, unmodified. 
-// 2) A propagation rule which translates dynamic information about the poison 
-//    state of each input to whether the dynamic output of the instruction 
-//    produces poison. 
-// 3) A creation rule which validates any poison producing flags on the 
-//    instruction itself (e.g. checks for overflow on nsw). 
-// 4) A check rule which traps (to a handler function) if this instruction must 
-//    execute undefined behavior given the poison state of it's inputs. 
-// 
-// This is a must analysis based transform; that is, the resulting code may 
-// produce a false negative result (not report UB when actually exists 
-// according to the LangRef spec), but should never produce a false positive 
-// (report UB where it doesn't exist). 
-// 
-// Use cases for this pass include: 
-// - Understanding (and testing!) the implications of the definition of poison 
-//   from the LangRef. 
-// - Validating the output of a IR fuzzer to ensure that all programs produced 
-//   are well defined on the specific input used. 
-// - Finding/confirming poison specific miscompiles by checking the poison 
-//   status of an input/IR pair is the same before and after an optimization 
-//   transform. 
-// - Checking that a bugpoint reduction does not introduce UB which didn't 
-//   exist in the original program being reduced. 
-// 
-// The major sources of inaccuracy are currently: 
-// - Most validation rules not yet implemented for instructions with poison 
-//   relavant flags.  At the moment, only nsw/nuw on add/sub are supported. 
-// - UB which is control dependent on a branch on poison is not yet 
-//   reported. Currently, only data flow dependence is modeled. 
-// - Poison which is propagated through memory is not modeled.  As such, 
-//   storing poison to memory and then reloading it will cause a false negative 
-//   as we consider the reloaded value to not be poisoned. 
-// - Poison propagation across function boundaries is not modeled.  At the 
-//   moment, all arguments and return values are assumed not to be poison. 
-// - Undef is not modeled.  In particular, the optimizer's freedom to pick 
-//   concrete values for undef bits so as to maximize potential for producing 
-//   poison is not modeled. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/PoisonChecking.h" 
-#include "llvm/ADT/DenseMap.h" 
-#include "llvm/ADT/Statistic.h" 
-#include "llvm/Analysis/MemoryBuiltins.h" 
-#include "llvm/Analysis/ValueTracking.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/InstVisitor.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/IR/PatternMatch.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Debug.h" 
- 
-using namespace llvm; 
- 
-#define DEBUG_TYPE "poison-checking" 
- 
-static cl::opt<bool> 
-LocalCheck("poison-checking-function-local", 
-           cl::init(false), 
-           cl::desc("Check that returns are non-poison (for testing)")); 
- 
- 
-static bool isConstantFalse(Value* V) { 
-  assert(V->getType()->isIntegerTy(1)); 
-  if (auto *CI = dyn_cast<ConstantInt>(V)) 
-    return CI->isZero(); 
-  return false; 
-} 
- 
-static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { 
-  if (Ops.size() == 0) 
-    return B.getFalse(); 
-  unsigned i = 0; 
-  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {} 
-  if (i == Ops.size()) 
-    return B.getFalse(); 
-  Value *Accum = Ops[i++]; 
-  for (; i < Ops.size(); i++) 
-    if (!isConstantFalse(Ops[i])) 
-      Accum = B.CreateOr(Accum, Ops[i]); 
-  return Accum; 
-} 
- 
-static void generateCreationChecksForBinOp(Instruction &I, 
-                                           SmallVectorImpl<Value*> &Checks) { 
-  assert(isa<BinaryOperator>(I)); 
- 
-  IRBuilder<> B(&I); 
-  Value *LHS = I.getOperand(0); 
-  Value *RHS = I.getOperand(1); 
-  switch (I.getOpcode()) { 
-  default: 
-    return; 
-  case Instruction::Add: { 
-    if (I.hasNoSignedWrap()) { 
-      auto *OverflowOp = 
-        B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS); 
-      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 
-    } 
-    if (I.hasNoUnsignedWrap()) { 
-      auto *OverflowOp = 
-        B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS); 
-      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 
-    } 
-    break; 
-  } 
-  case Instruction::Sub: { 
-    if (I.hasNoSignedWrap()) { 
-      auto *OverflowOp = 
-        B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS); 
-      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 
-    } 
-    if (I.hasNoUnsignedWrap()) { 
-      auto *OverflowOp = 
-        B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS); 
-      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 
-    } 
-    break; 
-  } 
-  case Instruction::Mul: { 
-    if (I.hasNoSignedWrap()) { 
-      auto *OverflowOp = 
-        B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS); 
-      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 
-    } 
-    if (I.hasNoUnsignedWrap()) { 
-      auto *OverflowOp = 
-        B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS); 
-      Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 
-    } 
-    break; 
-  } 
-  case Instruction::UDiv: { 
-    if (I.isExact()) { 
-      auto *Check = 
-        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS), 
-                     ConstantInt::get(LHS->getType(), 0)); 
-      Checks.push_back(Check); 
-    } 
-    break; 
-  } 
-  case Instruction::SDiv: { 
-    if (I.isExact()) { 
-      auto *Check = 
-        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS), 
-                     ConstantInt::get(LHS->getType(), 0)); 
-      Checks.push_back(Check); 
-    } 
-    break; 
-  } 
-  case Instruction::AShr: 
-  case Instruction::LShr: 
-  case Instruction::Shl: { 
-    Value *ShiftCheck = 
-      B.CreateICmp(ICmpInst::ICMP_UGE, RHS, 
-                   ConstantInt::get(RHS->getType(), 
-                                    LHS->getType()->getScalarSizeInBits())); 
-    Checks.push_back(ShiftCheck); 
-    break; 
-  } 
-  }; 
-} 
- 
-/// Given an instruction which can produce poison on non-poison inputs 
-/// (i.e. canCreatePoison returns true), generate runtime checks to produce 
-/// boolean indicators of when poison would result. 
-static void generateCreationChecks(Instruction &I, 
-                                   SmallVectorImpl<Value*> &Checks) { 
-  IRBuilder<> B(&I); 
-  if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy()) 
-    generateCreationChecksForBinOp(I, Checks); 
- 
-  // Handle non-binops separately 
-  switch (I.getOpcode()) { 
-  default: 
-    // Note there are a couple of missing cases here, once implemented, this 
-    // should become an llvm_unreachable. 
-    break; 
-  case Instruction::ExtractElement: { 
-    Value *Vec = I.getOperand(0); 
-    auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType()); 
-    if (!VecVTy) 
-      break; 
-    Value *Idx = I.getOperand(1); 
-    unsigned NumElts = VecVTy->getNumElements(); 
-    Value *Check = 
-      B.CreateICmp(ICmpInst::ICMP_UGE, Idx, 
-                   ConstantInt::get(Idx->getType(), NumElts)); 
-    Checks.push_back(Check); 
-    break; 
-  } 
-  case Instruction::InsertElement: { 
-    Value *Vec = I.getOperand(0); 
-    auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType()); 
-    if (!VecVTy) 
-      break; 
-    Value *Idx = I.getOperand(2); 
-    unsigned NumElts = VecVTy->getNumElements(); 
-    Value *Check = 
-      B.CreateICmp(ICmpInst::ICMP_UGE, Idx, 
-                   ConstantInt::get(Idx->getType(), NumElts)); 
-    Checks.push_back(Check); 
-    break; 
-  } 
-  }; 
-} 
- 
-static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { 
-  auto Itr = ValToPoison.find(V); 
-  if (Itr != ValToPoison.end()) 
-    return Itr->second; 
-  if (isa<Constant>(V)) { 
-    return ConstantInt::getFalse(V->getContext()); 
-  } 
-  // Return false for unknwon values - this implements a non-strict mode where 
-  // unhandled IR constructs are simply considered to never produce poison.  At 
-  // some point in the future, we probably want a "strict mode" for testing if 
-  // nothing else. 
-  return ConstantInt::getFalse(V->getContext()); 
-} 
- 
-static void CreateAssert(IRBuilder<> &B, Value *Cond) { 
-  assert(Cond->getType()->isIntegerTy(1)); 
-  if (auto *CI = dyn_cast<ConstantInt>(Cond)) 
-    if (CI->isAllOnesValue()) 
-      return; 
- 
-  Module *M = B.GetInsertBlock()->getModule(); 
-  M->getOrInsertFunction("__poison_checker_assert", 
-                         Type::getVoidTy(M->getContext()), 
-                         Type::getInt1Ty(M->getContext())); 
-  Function *TrapFunc = M->getFunction("__poison_checker_assert"); 
-  B.CreateCall(TrapFunc, Cond); 
-} 
- 
-static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { 
-  assert(Cond->getType()->isIntegerTy(1)); 
-  CreateAssert(B, B.CreateNot(Cond)); 
-} 
- 
-static bool rewrite(Function &F) { 
-  auto * const Int1Ty = Type::getInt1Ty(F.getContext()); 
- 
-  DenseMap<Value *, Value *> ValToPoison; 
- 
-  for (BasicBlock &BB : F) 
-    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 
-      auto *OldPHI = cast<PHINode>(&*I); 
-      auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues()); 
-      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) 
-        NewPHI->addIncoming(UndefValue::get(Int1Ty), 
-                            OldPHI->getIncomingBlock(i)); 
-      NewPHI->insertBefore(OldPHI); 
-      ValToPoison[OldPHI] = NewPHI; 
-    } 
- 
-  for (BasicBlock &BB : F) 
-    for (Instruction &I : BB) { 
-      if (isa<PHINode>(I)) continue; 
- 
-      IRBuilder<> B(cast<Instruction>(&I)); 
- 
-      // Note: There are many more sources of documented UB, but this pass only 
-      // attempts to find UB triggered by propagation of poison. 
+//===- PoisonChecking.cpp - -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements a transform pass which instruments IR such that poison semantics
+// are made explicit.  That is, it provides a (possibly partial) executable
+// semantics for every instruction w.r.t. poison as specified in the LLVM
+// LangRef.  There are obvious parallels to the sanitizer tools, but this pass
+// is focused purely on the semantics of LLVM IR, not any particular source
+// language.   If you're looking for something to see if your C/C++ contains
+// UB, this is not it.
+//
+// The rewritten semantics of each instruction will include the following
+// components:
+//
+// 1) The original instruction, unmodified.
+// 2) A propagation rule which translates dynamic information about the poison
+//    state of each input to whether the dynamic output of the instruction
+//    produces poison.
+// 3) A creation rule which validates any poison producing flags on the
+//    instruction itself (e.g. checks for overflow on nsw).
+// 4) A check rule which traps (to a handler function) if this instruction must
+//    execute undefined behavior given the poison state of it's inputs.
+//
+// This is a must analysis based transform; that is, the resulting code may
+// produce a false negative result (not report UB when actually exists
+// according to the LangRef spec), but should never produce a false positive
+// (report UB where it doesn't exist).
+//
+// Use cases for this pass include:
+// - Understanding (and testing!) the implications of the definition of poison
+//   from the LangRef.
+// - Validating the output of a IR fuzzer to ensure that all programs produced
+//   are well defined on the specific input used.
+// - Finding/confirming poison specific miscompiles by checking the poison
+//   status of an input/IR pair is the same before and after an optimization
+//   transform.
+// - Checking that a bugpoint reduction does not introduce UB which didn't
+//   exist in the original program being reduced.
+//
+// The major sources of inaccuracy are currently:
+// - Most validation rules not yet implemented for instructions with poison
+//   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
+// - UB which is control dependent on a branch on poison is not yet
+//   reported. Currently, only data flow dependence is modeled.
+// - Poison which is propagated through memory is not modeled.  As such,
+//   storing poison to memory and then reloading it will cause a false negative
+//   as we consider the reloaded value to not be poisoned.
+// - Poison propagation across function boundaries is not modeled.  At the
+//   moment, all arguments and return values are assumed not to be poison.
+// - Undef is not modeled.  In particular, the optimizer's freedom to pick
+//   concrete values for undef bits so as to maximize potential for producing
+//   poison is not modeled.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "poison-checking"
+
+static cl::opt<bool>
+LocalCheck("poison-checking-function-local",
+           cl::init(false),
+           cl::desc("Check that returns are non-poison (for testing)"));
+
+
+static bool isConstantFalse(Value* V) {
+  assert(V->getType()->isIntegerTy(1));
+  if (auto *CI = dyn_cast<ConstantInt>(V))
+    return CI->isZero();
+  return false;
+}
+
+static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
+  if (Ops.size() == 0)
+    return B.getFalse();
+  unsigned i = 0;
+  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
+  if (i == Ops.size())
+    return B.getFalse();
+  Value *Accum = Ops[i++];
+  for (; i < Ops.size(); i++)
+    if (!isConstantFalse(Ops[i]))
+      Accum = B.CreateOr(Accum, Ops[i]);
+  return Accum;
+}
+
+static void generateCreationChecksForBinOp(Instruction &I,
+                                           SmallVectorImpl<Value*> &Checks) {
+  assert(isa<BinaryOperator>(I));
+
+  IRBuilder<> B(&I);
+  Value *LHS = I.getOperand(0);
+  Value *RHS = I.getOperand(1);
+  switch (I.getOpcode()) {
+  default:
+    return;
+  case Instruction::Add: {
+    if (I.hasNoSignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    if (I.hasNoUnsignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    break;
+  }
+  case Instruction::Sub: {
+    if (I.hasNoSignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    if (I.hasNoUnsignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    break;
+  }
+  case Instruction::Mul: {
+    if (I.hasNoSignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    if (I.hasNoUnsignedWrap()) {
+      auto *OverflowOp =
+        B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
+      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+    }
+    break;
+  }
+  case Instruction::UDiv: {
+    if (I.isExact()) {
+      auto *Check =
+        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
+                     ConstantInt::get(LHS->getType(), 0));
+      Checks.push_back(Check);
+    }
+    break;
+  }
+  case Instruction::SDiv: {
+    if (I.isExact()) {
+      auto *Check =
+        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
+                     ConstantInt::get(LHS->getType(), 0));
+      Checks.push_back(Check);
+    }
+    break;
+  }
+  case Instruction::AShr:
+  case Instruction::LShr:
+  case Instruction::Shl: {
+    Value *ShiftCheck =
+      B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
+                   ConstantInt::get(RHS->getType(),
+                                    LHS->getType()->getScalarSizeInBits()));
+    Checks.push_back(ShiftCheck);
+    break;
+  }
+  };
+}
+
+/// Given an instruction which can produce poison on non-poison inputs
+/// (i.e. canCreatePoison returns true), generate runtime checks to produce
+/// boolean indicators of when poison would result.
+static void generateCreationChecks(Instruction &I,
+                                   SmallVectorImpl<Value*> &Checks) {
+  IRBuilder<> B(&I);
+  if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
+    generateCreationChecksForBinOp(I, Checks);
+
+  // Handle non-binops separately
+  switch (I.getOpcode()) {
+  default:
+    // Note there are a couple of missing cases here, once implemented, this
+    // should become an llvm_unreachable.
+    break;
+  case Instruction::ExtractElement: {
+    Value *Vec = I.getOperand(0);
+    auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
+    if (!VecVTy)
+      break;
+    Value *Idx = I.getOperand(1);
+    unsigned NumElts = VecVTy->getNumElements();
+    Value *Check =
+      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+                   ConstantInt::get(Idx->getType(), NumElts));
+    Checks.push_back(Check);
+    break;
+  }
+  case Instruction::InsertElement: {
+    Value *Vec = I.getOperand(0);
+    auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
+    if (!VecVTy)
+      break;
+    Value *Idx = I.getOperand(2);
+    unsigned NumElts = VecVTy->getNumElements();
+    Value *Check =
+      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+                   ConstantInt::get(Idx->getType(), NumElts));
+    Checks.push_back(Check);
+    break;
+  }
+  };
+}
+
+static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
+  auto Itr = ValToPoison.find(V);
+  if (Itr != ValToPoison.end())
+    return Itr->second;
+  if (isa<Constant>(V)) {
+    return ConstantInt::getFalse(V->getContext());
+  }
+  // Return false for unknwon values - this implements a non-strict mode where
+  // unhandled IR constructs are simply considered to never produce poison.  At
+  // some point in the future, we probably want a "strict mode" for testing if
+  // nothing else.
+  return ConstantInt::getFalse(V->getContext());
+}
+
+static void CreateAssert(IRBuilder<> &B, Value *Cond) {
+  assert(Cond->getType()->isIntegerTy(1));
+  if (auto *CI = dyn_cast<ConstantInt>(Cond))
+    if (CI->isAllOnesValue())
+      return;
+
+  Module *M = B.GetInsertBlock()->getModule();
+  M->getOrInsertFunction("__poison_checker_assert",
+                         Type::getVoidTy(M->getContext()),
+                         Type::getInt1Ty(M->getContext()));
+  Function *TrapFunc = M->getFunction("__poison_checker_assert");
+  B.CreateCall(TrapFunc, Cond);
+}
+
+static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
+  assert(Cond->getType()->isIntegerTy(1));
+  CreateAssert(B, B.CreateNot(Cond));
+}
+
+static bool rewrite(Function &F) {
+  auto * const Int1Ty = Type::getInt1Ty(F.getContext());
+
+  DenseMap<Value *, Value *> ValToPoison;
+
+  for (BasicBlock &BB : F)
+    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+      auto *OldPHI = cast<PHINode>(&*I);
+      auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
+      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
+        NewPHI->addIncoming(UndefValue::get(Int1Ty),
+                            OldPHI->getIncomingBlock(i));
+      NewPHI->insertBefore(OldPHI);
+      ValToPoison[OldPHI] = NewPHI;
+    }
+
+  for (BasicBlock &BB : F)
+    for (Instruction &I : BB) {
+      if (isa<PHINode>(I)) continue;
+
+      IRBuilder<> B(cast<Instruction>(&I));
+
+      // Note: There are many more sources of documented UB, but this pass only
+      // attempts to find UB triggered by propagation of poison.
       SmallPtrSet<const Value *, 4> NonPoisonOps;
       getGuaranteedNonPoisonOps(&I, NonPoisonOps);
       for (const Value *Op : NonPoisonOps)
         CreateAssertNot(B, getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
- 
-      if (LocalCheck) 
-        if (auto *RI = dyn_cast<ReturnInst>(&I)) 
-          if (RI->getNumOperands() != 0) { 
-            Value *Op = RI->getOperand(0); 
-            CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); 
-          } 
- 
-      SmallVector<Value*, 4> Checks; 
+
+      if (LocalCheck)
+        if (auto *RI = dyn_cast<ReturnInst>(&I))
+          if (RI->getNumOperands() != 0) {
+            Value *Op = RI->getOperand(0);
+            CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+          }
+
+      SmallVector<Value*, 4> Checks;
       if (propagatesPoison(cast<Operator>(&I)))
-        for (Value *V : I.operands()) 
-          Checks.push_back(getPoisonFor(ValToPoison, V)); 
- 
+        for (Value *V : I.operands())
+          Checks.push_back(getPoisonFor(ValToPoison, V));
+
       if (canCreatePoison(cast<Operator>(&I)))
-        generateCreationChecks(I, Checks); 
-      ValToPoison[&I] = buildOrChain(B, Checks); 
-    } 
- 
-  for (BasicBlock &BB : F) 
-    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 
-      auto *OldPHI = cast<PHINode>(&*I); 
-      if (!ValToPoison.count(OldPHI)) 
-        continue; // skip the newly inserted phis 
-      auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]); 
-      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { 
-        auto *OldVal = OldPHI->getIncomingValue(i); 
-        NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal)); 
-      } 
-    } 
-  return true; 
-} 
- 
- 
-PreservedAnalyses PoisonCheckingPass::run(Module &M, 
-                                          ModuleAnalysisManager &AM) { 
-  bool Changed = false; 
-  for (auto &F : M) 
-    Changed |= rewrite(F); 
- 
-  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 
-} 
- 
-PreservedAnalyses PoisonCheckingPass::run(Function &F, 
-                                          FunctionAnalysisManager &AM) { 
-  return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); 
-} 
- 
-/* Major TODO Items: 
-   - Control dependent poison UB 
-   - Strict mode - (i.e. must analyze every operand) 
-     - Poison through memory 
-     - Function ABIs 
-     - Full coverage of intrinsics, etc.. (ouch) 
- 
-   Instructions w/Unclear Semantics: 
-   - shufflevector - It would seem reasonable for an out of bounds mask element 
-     to produce poison, but the LangRef does not state. 
-   - all binary ops w/vector operands - The likely interpretation would be that 
-     any element overflowing should produce poison for the entire result, but 
-     the LangRef does not state. 
-   - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems 
-     strange that only certian flags should be documented as producing poison. 
- 
-   Cases of clear poison semantics not yet implemented: 
-   - Exact flags on ashr/lshr produce poison 
-   - NSW/NUW flags on shl produce poison 
-   - Inbounds flag on getelementptr produce poison 
-   - fptosi/fptoui (out of bounds input) produce poison 
-   - Scalable vector types for insertelement/extractelement 
-   - Floating point binary ops w/fmf nnan/noinfs flags produce poison 
- */ 
+        generateCreationChecks(I, Checks);
+      ValToPoison[&I] = buildOrChain(B, Checks);
+    }
+
+  for (BasicBlock &BB : F)
+    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+      auto *OldPHI = cast<PHINode>(&*I);
+      if (!ValToPoison.count(OldPHI))
+        continue; // skip the newly inserted phis
+      auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
+      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
+        auto *OldVal = OldPHI->getIncomingValue(i);
+        NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
+      }
+    }
+  return true;
+}
+
+
+PreservedAnalyses PoisonCheckingPass::run(Module &M,
+                                          ModuleAnalysisManager &AM) {
+  bool Changed = false;
+  for (auto &F : M)
+    Changed |= rewrite(F);
+
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+PreservedAnalyses PoisonCheckingPass::run(Function &F,
+                                          FunctionAnalysisManager &AM) {
+  return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+/* Major TODO Items:
+   - Control dependent poison UB
+   - Strict mode - (i.e. must analyze every operand)
+     - Poison through memory
+     - Function ABIs
+     - Full coverage of intrinsics, etc.. (ouch)
+
+   Instructions w/Unclear Semantics:
+   - shufflevector - It would seem reasonable for an out of bounds mask element
+     to produce poison, but the LangRef does not state.
+   - all binary ops w/vector operands - The likely interpretation would be that
+     any element overflowing should produce poison for the entire result, but
+     the LangRef does not state.
+   - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
+     strange that only certian flags should be documented as producing poison.
+
+   Cases of clear poison semantics not yet implemented:
+   - Exact flags on ashr/lshr produce poison
+   - NSW/NUW flags on shl produce poison
+   - Inbounds flag on getelementptr produce poison
+   - fptosi/fptoui (out of bounds input) produce poison
+   - Scalable vector types for insertelement/extractelement
+   - Floating point binary ops w/fmf nnan/noinfs flags produce poison
+ */
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 656cf6267b..2d4b079394 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -1,50 +1,50 @@
-//===-- SanitizerCoverage.cpp - coverage instrumentation for sanitizers ---===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// Coverage instrumentation done on LLVM IR level, works with Sanitizers. 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" 
-#include "llvm/ADT/ArrayRef.h" 
-#include "llvm/ADT/SmallVector.h" 
-#include "llvm/Analysis/EHPersonalities.h" 
-#include "llvm/Analysis/PostDominators.h" 
-#include "llvm/IR/CFG.h" 
-#include "llvm/IR/Constant.h" 
-#include "llvm/IR/DataLayout.h" 
-#include "llvm/IR/DebugInfo.h" 
-#include "llvm/IR/Dominators.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/GlobalVariable.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/InlineAsm.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/IR/Intrinsics.h" 
-#include "llvm/IR/LLVMContext.h" 
-#include "llvm/IR/MDBuilder.h" 
-#include "llvm/IR/Mangler.h" 
-#include "llvm/IR/Module.h" 
-#include "llvm/IR/Type.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/SpecialCaseList.h" 
-#include "llvm/Support/VirtualFileSystem.h" 
-#include "llvm/Support/raw_ostream.h" 
-#include "llvm/Transforms/Instrumentation.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include "llvm/Transforms/Utils/ModuleUtils.h" 
- 
-using namespace llvm; 
- 
-#define DEBUG_TYPE "sancov" 
- 
+//===-- SanitizerCoverage.cpp - coverage instrumentation for sanitizers ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coverage instrumentation done on LLVM IR level, works with Sanitizers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/SpecialCaseList.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sancov"
+
 const char SanCovTracePCIndirName[] = "__sanitizer_cov_trace_pc_indir";
 const char SanCovTracePCName[] = "__sanitizer_cov_trace_pc";
 const char SanCovTraceCmp1[] = "__sanitizer_cov_trace_cmp1";
@@ -60,935 +60,935 @@ const char SanCovTraceDiv8[] = "__sanitizer_cov_trace_div8";
 const char SanCovTraceGep[] = "__sanitizer_cov_trace_gep";
 const char SanCovTraceSwitchName[] = "__sanitizer_cov_trace_switch";
 const char SanCovModuleCtorTracePcGuardName[] =
-    "sancov.module_ctor_trace_pc_guard"; 
+    "sancov.module_ctor_trace_pc_guard";
 const char SanCovModuleCtor8bitCountersName[] =
-    "sancov.module_ctor_8bit_counters"; 
+    "sancov.module_ctor_8bit_counters";
 const char SanCovModuleCtorBoolFlagName[] = "sancov.module_ctor_bool_flag";
-static const uint64_t SanCtorAndDtorPriority = 2; 
- 
+static const uint64_t SanCtorAndDtorPriority = 2;
+
 const char SanCovTracePCGuardName[] = "__sanitizer_cov_trace_pc_guard";
 const char SanCovTracePCGuardInitName[] = "__sanitizer_cov_trace_pc_guard_init";
 const char SanCov8bitCountersInitName[] = "__sanitizer_cov_8bit_counters_init";
 const char SanCovBoolFlagInitName[] = "__sanitizer_cov_bool_flag_init";
 const char SanCovPCsInitName[] = "__sanitizer_cov_pcs_init";
- 
+
 const char SanCovGuardsSectionName[] = "sancov_guards";
 const char SanCovCountersSectionName[] = "sancov_cntrs";
 const char SanCovBoolFlagSectionName[] = "sancov_bools";
 const char SanCovPCsSectionName[] = "sancov_pcs";
- 
+
 const char SanCovLowestStackName[] = "__sancov_lowest_stack";
- 
-static cl::opt<int> ClCoverageLevel( 
-    "sanitizer-coverage-level", 
-    cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, " 
-             "3: all blocks and critical edges"), 
-    cl::Hidden, cl::init(0)); 
- 
-static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc", 
-                               cl::desc("Experimental pc tracing"), cl::Hidden, 
-                               cl::init(false)); 
- 
-static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard", 
-                                    cl::desc("pc tracing with a guard"), 
-                                    cl::Hidden, cl::init(false)); 
- 
-// If true, we create a global variable that contains PCs of all instrumented 
-// BBs, put this global into a named section, and pass this section's bounds 
-// to __sanitizer_cov_pcs_init. 
-// This way the coverage instrumentation does not need to acquire the PCs 
-// at run-time. Works with trace-pc-guard, inline-8bit-counters, and 
-// inline-bool-flag. 
-static cl::opt<bool> ClCreatePCTable("sanitizer-coverage-pc-table", 
-                                     cl::desc("create a static PC table"), 
-                                     cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> 
-    ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters", 
-                         cl::desc("increments 8-bit counter for every edge"), 
-                         cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> 
-    ClInlineBoolFlag("sanitizer-coverage-inline-bool-flag", 
-                     cl::desc("sets a boolean flag for every edge"), cl::Hidden, 
-                     cl::init(false)); 
- 
-static cl::opt<bool> 
-    ClCMPTracing("sanitizer-coverage-trace-compares", 
-                 cl::desc("Tracing of CMP and similar instructions"), 
-                 cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClDIVTracing("sanitizer-coverage-trace-divs", 
-                                  cl::desc("Tracing of DIV instructions"), 
-                                  cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> ClGEPTracing("sanitizer-coverage-trace-geps", 
-                                  cl::desc("Tracing of GEP instructions"), 
-                                  cl::Hidden, cl::init(false)); 
- 
-static cl::opt<bool> 
-    ClPruneBlocks("sanitizer-coverage-prune-blocks", 
-                  cl::desc("Reduce the number of instrumented blocks"), 
-                  cl::Hidden, cl::init(true)); 
- 
-static cl::opt<bool> ClStackDepth("sanitizer-coverage-stack-depth", 
-                                  cl::desc("max stack depth tracing"), 
-                                  cl::Hidden, cl::init(false)); 
- 
-namespace { 
- 
-SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) { 
-  SanitizerCoverageOptions Res; 
-  switch (LegacyCoverageLevel) { 
-  case 0: 
-    Res.CoverageType = SanitizerCoverageOptions::SCK_None; 
-    break; 
-  case 1: 
-    Res.CoverageType = SanitizerCoverageOptions::SCK_Function; 
-    break; 
-  case 2: 
-    Res.CoverageType = SanitizerCoverageOptions::SCK_BB; 
-    break; 
-  case 3: 
-    Res.CoverageType = SanitizerCoverageOptions::SCK_Edge; 
-    break; 
-  case 4: 
-    Res.CoverageType = SanitizerCoverageOptions::SCK_Edge; 
-    Res.IndirectCalls = true; 
-    break; 
-  } 
-  return Res; 
-} 
- 
-SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { 
-  // Sets CoverageType and IndirectCalls. 
-  SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel); 
-  Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType); 
-  Options.IndirectCalls |= CLOpts.IndirectCalls; 
-  Options.TraceCmp |= ClCMPTracing; 
-  Options.TraceDiv |= ClDIVTracing; 
-  Options.TraceGep |= ClGEPTracing; 
-  Options.TracePC |= ClTracePC; 
-  Options.TracePCGuard |= ClTracePCGuard; 
-  Options.Inline8bitCounters |= ClInline8bitCounters; 
-  Options.InlineBoolFlag |= ClInlineBoolFlag; 
-  Options.PCTable |= ClCreatePCTable; 
-  Options.NoPrune |= !ClPruneBlocks; 
-  Options.StackDepth |= ClStackDepth; 
-  if (!Options.TracePCGuard && !Options.TracePC && 
-      !Options.Inline8bitCounters && !Options.StackDepth && 
-      !Options.InlineBoolFlag) 
-    Options.TracePCGuard = true; // TracePCGuard is default. 
-  return Options; 
-} 
- 
-using DomTreeCallback = function_ref<const DominatorTree *(Function &F)>; 
-using PostDomTreeCallback = 
-    function_ref<const PostDominatorTree *(Function &F)>; 
- 
-class ModuleSanitizerCoverage { 
-public: 
-  ModuleSanitizerCoverage( 
-      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), 
-      const SpecialCaseList *Allowlist = nullptr, 
-      const SpecialCaseList *Blocklist = nullptr) 
-      : Options(OverrideFromCL(Options)), Allowlist(Allowlist), 
-        Blocklist(Blocklist) {} 
-  bool instrumentModule(Module &M, DomTreeCallback DTCallback, 
-                        PostDomTreeCallback PDTCallback); 
- 
-private: 
-  void instrumentFunction(Function &F, DomTreeCallback DTCallback, 
-                          PostDomTreeCallback PDTCallback); 
-  void InjectCoverageForIndirectCalls(Function &F, 
-                                      ArrayRef<Instruction *> IndirCalls); 
-  void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets); 
-  void InjectTraceForDiv(Function &F, 
-                         ArrayRef<BinaryOperator *> DivTraceTargets); 
-  void InjectTraceForGep(Function &F, 
-                         ArrayRef<GetElementPtrInst *> GepTraceTargets); 
-  void InjectTraceForSwitch(Function &F, 
-                            ArrayRef<Instruction *> SwitchTraceTargets); 
-  bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks, 
-                      bool IsLeafFunc = true); 
-  GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements, 
-                                                    Function &F, Type *Ty, 
-                                                    const char *Section); 
-  GlobalVariable *CreatePCArray(Function &F, ArrayRef<BasicBlock *> AllBlocks); 
-  void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks); 
-  void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, 
-                             bool IsLeafFunc = true); 
-  Function *CreateInitCallsForSections(Module &M, const char *CtorName, 
-                                       const char *InitFunctionName, Type *Ty, 
-                                       const char *Section); 
-  std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section, 
-                                                Type *Ty); 
- 
-  void SetNoSanitizeMetadata(Instruction *I) { 
-    I->setMetadata(I->getModule()->getMDKindID("nosanitize"), 
-                   MDNode::get(*C, None)); 
-  } 
- 
-  std::string getSectionName(const std::string &Section) const; 
-  std::string getSectionStart(const std::string &Section) const; 
-  std::string getSectionEnd(const std::string &Section) const; 
-  FunctionCallee SanCovTracePCIndir; 
-  FunctionCallee SanCovTracePC, SanCovTracePCGuard; 
-  FunctionCallee SanCovTraceCmpFunction[4]; 
-  FunctionCallee SanCovTraceConstCmpFunction[4]; 
-  FunctionCallee SanCovTraceDivFunction[2]; 
-  FunctionCallee SanCovTraceGepFunction; 
-  FunctionCallee SanCovTraceSwitchFunction; 
-  GlobalVariable *SanCovLowestStack; 
-  Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy, 
-      *Int16Ty, *Int8Ty, *Int8PtrTy, *Int1Ty, *Int1PtrTy; 
-  Module *CurModule; 
-  std::string CurModuleUniqueId; 
-  Triple TargetTriple; 
-  LLVMContext *C; 
-  const DataLayout *DL; 
- 
-  GlobalVariable *FunctionGuardArray;  // for trace-pc-guard. 
-  GlobalVariable *Function8bitCounterArray;  // for inline-8bit-counters. 
-  GlobalVariable *FunctionBoolArray;         // for inline-bool-flag. 
-  GlobalVariable *FunctionPCsArray;  // for pc-table. 
-  SmallVector<GlobalValue *, 20> GlobalsToAppendToUsed; 
-  SmallVector<GlobalValue *, 20> GlobalsToAppendToCompilerUsed; 
- 
-  SanitizerCoverageOptions Options; 
- 
-  const SpecialCaseList *Allowlist; 
-  const SpecialCaseList *Blocklist; 
-}; 
- 
-class ModuleSanitizerCoverageLegacyPass : public ModulePass { 
-public: 
-  ModuleSanitizerCoverageLegacyPass( 
-      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), 
-      const std::vector<std::string> &AllowlistFiles = 
-          std::vector<std::string>(), 
-      const std::vector<std::string> &BlocklistFiles = 
-          std::vector<std::string>()) 
-      : ModulePass(ID), Options(Options) { 
-    if (AllowlistFiles.size() > 0) 
-      Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, 
-                                               *vfs::getRealFileSystem()); 
-    if (BlocklistFiles.size() > 0) 
-      Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, 
-                                               *vfs::getRealFileSystem()); 
-    initializeModuleSanitizerCoverageLegacyPassPass( 
-        *PassRegistry::getPassRegistry()); 
-  } 
-  bool runOnModule(Module &M) override { 
-    ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(), 
-                                         Blocklist.get()); 
-    auto DTCallback = [this](Function &F) -> const DominatorTree * { 
-      return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); 
-    }; 
-    auto PDTCallback = [this](Function &F) -> const PostDominatorTree * { 
-      return &this->getAnalysis<PostDominatorTreeWrapperPass>(F) 
-                  .getPostDomTree(); 
-    }; 
-    return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback); 
-  } 
- 
-  static char ID; // Pass identification, replacement for typeid 
-  StringRef getPassName() const override { return "ModuleSanitizerCoverage"; } 
- 
-  void getAnalysisUsage(AnalysisUsage &AU) const override { 
-    AU.addRequired<DominatorTreeWrapperPass>(); 
-    AU.addRequired<PostDominatorTreeWrapperPass>(); 
-  } 
- 
-private: 
-  SanitizerCoverageOptions Options; 
- 
-  std::unique_ptr<SpecialCaseList> Allowlist; 
-  std::unique_ptr<SpecialCaseList> Blocklist; 
-}; 
- 
-} // namespace 
- 
-PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M, 
-                                                   ModuleAnalysisManager &MAM) { 
-  ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(), 
-                                       Blocklist.get()); 
-  auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 
-  auto DTCallback = [&FAM](Function &F) -> const DominatorTree * { 
-    return &FAM.getResult<DominatorTreeAnalysis>(F); 
-  }; 
-  auto PDTCallback = [&FAM](Function &F) -> const PostDominatorTree * { 
-    return &FAM.getResult<PostDominatorTreeAnalysis>(F); 
-  }; 
-  if (ModuleSancov.instrumentModule(M, DTCallback, PDTCallback)) 
-    return PreservedAnalyses::none(); 
-  return PreservedAnalyses::all(); 
-} 
- 
-std::pair<Value *, Value *> 
-ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section, 
-                                           Type *Ty) { 
+
+static cl::opt<int> ClCoverageLevel(
+    "sanitizer-coverage-level",
+    cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
+             "3: all blocks and critical edges"),
+    cl::Hidden, cl::init(0));
+
+static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc",
+                               cl::desc("Experimental pc tracing"), cl::Hidden,
+                               cl::init(false));
+
+static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard",
+                                    cl::desc("pc tracing with a guard"),
+                                    cl::Hidden, cl::init(false));
+
+// If true, we create a global variable that contains PCs of all instrumented
+// BBs, put this global into a named section, and pass this section's bounds
+// to __sanitizer_cov_pcs_init.
+// This way the coverage instrumentation does not need to acquire the PCs
+// at run-time. Works with trace-pc-guard, inline-8bit-counters, and
+// inline-bool-flag.
+static cl::opt<bool> ClCreatePCTable("sanitizer-coverage-pc-table",
+                                     cl::desc("create a static PC table"),
+                                     cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+    ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters",
+                         cl::desc("increments 8-bit counter for every edge"),
+                         cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+    ClInlineBoolFlag("sanitizer-coverage-inline-bool-flag",
+                     cl::desc("sets a boolean flag for every edge"), cl::Hidden,
+                     cl::init(false));
+
+static cl::opt<bool>
+    ClCMPTracing("sanitizer-coverage-trace-compares",
+                 cl::desc("Tracing of CMP and similar instructions"),
+                 cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClDIVTracing("sanitizer-coverage-trace-divs",
+                                  cl::desc("Tracing of DIV instructions"),
+                                  cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClGEPTracing("sanitizer-coverage-trace-geps",
+                                  cl::desc("Tracing of GEP instructions"),
+                                  cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+    ClPruneBlocks("sanitizer-coverage-prune-blocks",
+                  cl::desc("Reduce the number of instrumented blocks"),
+                  cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClStackDepth("sanitizer-coverage-stack-depth",
+                                  cl::desc("max stack depth tracing"),
+                                  cl::Hidden, cl::init(false));
+
+namespace {
+
+SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) {
+  SanitizerCoverageOptions Res;
+  switch (LegacyCoverageLevel) {
+  case 0:
+    Res.CoverageType = SanitizerCoverageOptions::SCK_None;
+    break;
+  case 1:
+    Res.CoverageType = SanitizerCoverageOptions::SCK_Function;
+    break;
+  case 2:
+    Res.CoverageType = SanitizerCoverageOptions::SCK_BB;
+    break;
+  case 3:
+    Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+    break;
+  case 4:
+    Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+    Res.IndirectCalls = true;
+    break;
+  }
+  return Res;
+}
+
+SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
+  // Sets CoverageType and IndirectCalls.
+  SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel);
+  Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType);
+  Options.IndirectCalls |= CLOpts.IndirectCalls;
+  Options.TraceCmp |= ClCMPTracing;
+  Options.TraceDiv |= ClDIVTracing;
+  Options.TraceGep |= ClGEPTracing;
+  Options.TracePC |= ClTracePC;
+  Options.TracePCGuard |= ClTracePCGuard;
+  Options.Inline8bitCounters |= ClInline8bitCounters;
+  Options.InlineBoolFlag |= ClInlineBoolFlag;
+  Options.PCTable |= ClCreatePCTable;
+  Options.NoPrune |= !ClPruneBlocks;
+  Options.StackDepth |= ClStackDepth;
+  if (!Options.TracePCGuard && !Options.TracePC &&
+      !Options.Inline8bitCounters && !Options.StackDepth &&
+      !Options.InlineBoolFlag)
+    Options.TracePCGuard = true; // TracePCGuard is default.
+  return Options;
+}
+
+using DomTreeCallback = function_ref<const DominatorTree *(Function &F)>;
+using PostDomTreeCallback =
+    function_ref<const PostDominatorTree *(Function &F)>;
+
+class ModuleSanitizerCoverage {
+public:
+  ModuleSanitizerCoverage(
+      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
+      const SpecialCaseList *Allowlist = nullptr,
+      const SpecialCaseList *Blocklist = nullptr)
+      : Options(OverrideFromCL(Options)), Allowlist(Allowlist),
+        Blocklist(Blocklist) {}
+  bool instrumentModule(Module &M, DomTreeCallback DTCallback,
+                        PostDomTreeCallback PDTCallback);
+
+private:
+  void instrumentFunction(Function &F, DomTreeCallback DTCallback,
+                          PostDomTreeCallback PDTCallback);
+  void InjectCoverageForIndirectCalls(Function &F,
+                                      ArrayRef<Instruction *> IndirCalls);
+  void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
+  void InjectTraceForDiv(Function &F,
+                         ArrayRef<BinaryOperator *> DivTraceTargets);
+  void InjectTraceForGep(Function &F,
+                         ArrayRef<GetElementPtrInst *> GepTraceTargets);
+  void InjectTraceForSwitch(Function &F,
+                            ArrayRef<Instruction *> SwitchTraceTargets);
+  bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks,
+                      bool IsLeafFunc = true);
+  GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements,
+                                                    Function &F, Type *Ty,
+                                                    const char *Section);
+  GlobalVariable *CreatePCArray(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+  void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+  void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
+                             bool IsLeafFunc = true);
+  Function *CreateInitCallsForSections(Module &M, const char *CtorName,
+                                       const char *InitFunctionName, Type *Ty,
+                                       const char *Section);
+  std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section,
+                                                Type *Ty);
+
+  void SetNoSanitizeMetadata(Instruction *I) {
+    I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
+                   MDNode::get(*C, None));
+  }
+
+  std::string getSectionName(const std::string &Section) const;
+  std::string getSectionStart(const std::string &Section) const;
+  std::string getSectionEnd(const std::string &Section) const;
+  FunctionCallee SanCovTracePCIndir;
+  FunctionCallee SanCovTracePC, SanCovTracePCGuard;
+  FunctionCallee SanCovTraceCmpFunction[4];
+  FunctionCallee SanCovTraceConstCmpFunction[4];
+  FunctionCallee SanCovTraceDivFunction[2];
+  FunctionCallee SanCovTraceGepFunction;
+  FunctionCallee SanCovTraceSwitchFunction;
+  GlobalVariable *SanCovLowestStack;
+  Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
+      *Int16Ty, *Int8Ty, *Int8PtrTy, *Int1Ty, *Int1PtrTy;
+  Module *CurModule;
+  std::string CurModuleUniqueId;
+  Triple TargetTriple;
+  LLVMContext *C;
+  const DataLayout *DL;
+
+  GlobalVariable *FunctionGuardArray;  // for trace-pc-guard.
+  GlobalVariable *Function8bitCounterArray;  // for inline-8bit-counters.
+  GlobalVariable *FunctionBoolArray;         // for inline-bool-flag.
+  GlobalVariable *FunctionPCsArray;  // for pc-table.
+  SmallVector<GlobalValue *, 20> GlobalsToAppendToUsed;
+  SmallVector<GlobalValue *, 20> GlobalsToAppendToCompilerUsed;
+
+  SanitizerCoverageOptions Options;
+
+  const SpecialCaseList *Allowlist;
+  const SpecialCaseList *Blocklist;
+};
+
+class ModuleSanitizerCoverageLegacyPass : public ModulePass {
+public:
+  ModuleSanitizerCoverageLegacyPass(
+      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
+      const std::vector<std::string> &AllowlistFiles =
+          std::vector<std::string>(),
+      const std::vector<std::string> &BlocklistFiles =
+          std::vector<std::string>())
+      : ModulePass(ID), Options(Options) {
+    if (AllowlistFiles.size() > 0)
+      Allowlist = SpecialCaseList::createOrDie(AllowlistFiles,
+                                               *vfs::getRealFileSystem());
+    if (BlocklistFiles.size() > 0)
+      Blocklist = SpecialCaseList::createOrDie(BlocklistFiles,
+                                               *vfs::getRealFileSystem());
+    initializeModuleSanitizerCoverageLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+  bool runOnModule(Module &M) override {
+    ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(),
+                                         Blocklist.get());
+    auto DTCallback = [this](Function &F) -> const DominatorTree * {
+      return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+    };
+    auto PDTCallback = [this](Function &F) -> const PostDominatorTree * {
+      return &this->getAnalysis<PostDominatorTreeWrapperPass>(F)
+                  .getPostDomTree();
+    };
+    return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback);
+  }
+
+  static char ID; // Pass identification, replacement for typeid
+  StringRef getPassName() const override { return "ModuleSanitizerCoverage"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<PostDominatorTreeWrapperPass>();
+  }
+
+private:
+  SanitizerCoverageOptions Options;
+
+  std::unique_ptr<SpecialCaseList> Allowlist;
+  std::unique_ptr<SpecialCaseList> Blocklist;
+};
+
+} // namespace
+
+PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M,
+                                                   ModuleAnalysisManager &MAM) {
+  ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(),
+                                       Blocklist.get());
+  auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto DTCallback = [&FAM](Function &F) -> const DominatorTree * {
+    return &FAM.getResult<DominatorTreeAnalysis>(F);
+  };
+  auto PDTCallback = [&FAM](Function &F) -> const PostDominatorTree * {
+    return &FAM.getResult<PostDominatorTreeAnalysis>(F);
+  };
+  if (ModuleSancov.instrumentModule(M, DTCallback, PDTCallback))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+std::pair<Value *, Value *>
+ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section,
+                                           Type *Ty) {
   GlobalVariable *SecStart = new GlobalVariable(
       M, Ty->getPointerElementType(), false, GlobalVariable::ExternalLinkage,
       nullptr, getSectionStart(Section));
-  SecStart->setVisibility(GlobalValue::HiddenVisibility); 
+  SecStart->setVisibility(GlobalValue::HiddenVisibility);
   GlobalVariable *SecEnd = new GlobalVariable(
       M, Ty->getPointerElementType(), false, GlobalVariable::ExternalLinkage,
       nullptr, getSectionEnd(Section));
-  SecEnd->setVisibility(GlobalValue::HiddenVisibility); 
-  IRBuilder<> IRB(M.getContext()); 
-  if (!TargetTriple.isOSBinFormatCOFF()) 
+  SecEnd->setVisibility(GlobalValue::HiddenVisibility);
+  IRBuilder<> IRB(M.getContext());
+  if (!TargetTriple.isOSBinFormatCOFF())
     return std::make_pair(SecStart, SecEnd);
- 
-  // Account for the fact that on windows-msvc __start_* symbols actually 
-  // point to a uint64_t before the start of the array. 
-  auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy); 
-  auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr, 
-                           ConstantInt::get(IntptrTy, sizeof(uint64_t))); 
+
+  // Account for the fact that on windows-msvc __start_* symbols actually
+  // point to a uint64_t before the start of the array.
+  auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy);
+  auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr,
+                           ConstantInt::get(IntptrTy, sizeof(uint64_t)));
   return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEnd);
-} 
- 
-Function *ModuleSanitizerCoverage::CreateInitCallsForSections( 
-    Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty, 
-    const char *Section) { 
-  auto SecStartEnd = CreateSecStartEnd(M, Section, Ty); 
-  auto SecStart = SecStartEnd.first; 
-  auto SecEnd = SecStartEnd.second; 
-  Function *CtorFunc; 
-  std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( 
-      M, CtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd}); 
-  assert(CtorFunc->getName() == CtorName); 
- 
-  if (TargetTriple.supportsCOMDAT()) { 
-    // Use comdat to dedup CtorFunc. 
-    CtorFunc->setComdat(M.getOrInsertComdat(CtorName)); 
-    appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc); 
-  } else { 
-    appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); 
-  } 
- 
-  if (TargetTriple.isOSBinFormatCOFF()) { 
-    // In COFF files, if the contructors are set as COMDAT (they are because 
-    // COFF supports COMDAT) and the linker flag /OPT:REF (strip unreferenced 
-    // functions and data) is used, the constructors get stripped. To prevent 
-    // this, give the constructors weak ODR linkage and ensure the linker knows 
-    // to include the sancov constructor. This way the linker can deduplicate 
-    // the constructors but always leave one copy. 
-    CtorFunc->setLinkage(GlobalValue::WeakODRLinkage); 
-    appendToUsed(M, CtorFunc); 
-  } 
-  return CtorFunc; 
-} 
- 
-bool ModuleSanitizerCoverage::instrumentModule( 
-    Module &M, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) { 
-  if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) 
-    return false; 
-  if (Allowlist && 
-      !Allowlist->inSection("coverage", "src", M.getSourceFileName())) 
-    return false; 
-  if (Blocklist && 
-      Blocklist->inSection("coverage", "src", M.getSourceFileName())) 
-    return false; 
-  C = &(M.getContext()); 
-  DL = &M.getDataLayout(); 
-  CurModule = &M; 
-  CurModuleUniqueId = getUniqueModuleId(CurModule); 
-  TargetTriple = Triple(M.getTargetTriple()); 
-  FunctionGuardArray = nullptr; 
-  Function8bitCounterArray = nullptr; 
-  FunctionBoolArray = nullptr; 
-  FunctionPCsArray = nullptr; 
-  IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); 
-  IntptrPtrTy = PointerType::getUnqual(IntptrTy); 
-  Type *VoidTy = Type::getVoidTy(*C); 
-  IRBuilder<> IRB(*C); 
-  Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty()); 
-  Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); 
-  Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); 
-  Int1PtrTy = PointerType::getUnqual(IRB.getInt1Ty()); 
-  Int64Ty = IRB.getInt64Ty(); 
-  Int32Ty = IRB.getInt32Ty(); 
-  Int16Ty = IRB.getInt16Ty(); 
-  Int8Ty = IRB.getInt8Ty(); 
-  Int1Ty = IRB.getInt1Ty(); 
- 
-  SanCovTracePCIndir = 
-      M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy); 
+}
+
+Function *ModuleSanitizerCoverage::CreateInitCallsForSections(
+    Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty,
+    const char *Section) {
+  auto SecStartEnd = CreateSecStartEnd(M, Section, Ty);
+  auto SecStart = SecStartEnd.first;
+  auto SecEnd = SecStartEnd.second;
+  Function *CtorFunc;
+  std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
+      M, CtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd});
+  assert(CtorFunc->getName() == CtorName);
+
+  if (TargetTriple.supportsCOMDAT()) {
+    // Use comdat to dedup CtorFunc.
+    CtorFunc->setComdat(M.getOrInsertComdat(CtorName));
+    appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
+  } else {
+    appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
+  }
+
+  if (TargetTriple.isOSBinFormatCOFF()) {
+    // In COFF files, if the contructors are set as COMDAT (they are because
+    // COFF supports COMDAT) and the linker flag /OPT:REF (strip unreferenced
+    // functions and data) is used, the constructors get stripped. To prevent
+    // this, give the constructors weak ODR linkage and ensure the linker knows
+    // to include the sancov constructor. This way the linker can deduplicate
+    // the constructors but always leave one copy.
+    CtorFunc->setLinkage(GlobalValue::WeakODRLinkage);
+    appendToUsed(M, CtorFunc);
+  }
+  return CtorFunc;
+}
+
+bool ModuleSanitizerCoverage::instrumentModule(
+    Module &M, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) {
+  if (Options.CoverageType == SanitizerCoverageOptions::SCK_None)
+    return false;
+  if (Allowlist &&
+      !Allowlist->inSection("coverage", "src", M.getSourceFileName()))
+    return false;
+  if (Blocklist &&
+      Blocklist->inSection("coverage", "src", M.getSourceFileName()))
+    return false;
+  C = &(M.getContext());
+  DL = &M.getDataLayout();
+  CurModule = &M;
+  CurModuleUniqueId = getUniqueModuleId(CurModule);
+  TargetTriple = Triple(M.getTargetTriple());
+  FunctionGuardArray = nullptr;
+  Function8bitCounterArray = nullptr;
+  FunctionBoolArray = nullptr;
+  FunctionPCsArray = nullptr;
+  IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
+  IntptrPtrTy = PointerType::getUnqual(IntptrTy);
+  Type *VoidTy = Type::getVoidTy(*C);
+  IRBuilder<> IRB(*C);
+  Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
+  Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+  Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
+  Int1PtrTy = PointerType::getUnqual(IRB.getInt1Ty());
+  Int64Ty = IRB.getInt64Ty();
+  Int32Ty = IRB.getInt32Ty();
+  Int16Ty = IRB.getInt16Ty();
+  Int8Ty = IRB.getInt8Ty();
+  Int1Ty = IRB.getInt1Ty();
+
+  SanCovTracePCIndir =
+      M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy);
   // Make sure smaller parameters are zero-extended to i64 if required by the
   // target ABI.
-  AttributeList SanCovTraceCmpZeroExtAL; 
+  AttributeList SanCovTraceCmpZeroExtAL;
   SanCovTraceCmpZeroExtAL =
       SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 0, Attribute::ZExt);
   SanCovTraceCmpZeroExtAL =
       SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 1, Attribute::ZExt);
- 
-  SanCovTraceCmpFunction[0] = 
-      M.getOrInsertFunction(SanCovTraceCmp1, SanCovTraceCmpZeroExtAL, VoidTy, 
-                            IRB.getInt8Ty(), IRB.getInt8Ty()); 
-  SanCovTraceCmpFunction[1] = 
-      M.getOrInsertFunction(SanCovTraceCmp2, SanCovTraceCmpZeroExtAL, VoidTy, 
-                            IRB.getInt16Ty(), IRB.getInt16Ty()); 
-  SanCovTraceCmpFunction[2] = 
-      M.getOrInsertFunction(SanCovTraceCmp4, SanCovTraceCmpZeroExtAL, VoidTy, 
-                            IRB.getInt32Ty(), IRB.getInt32Ty()); 
-  SanCovTraceCmpFunction[3] = 
-      M.getOrInsertFunction(SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty); 
- 
-  SanCovTraceConstCmpFunction[0] = M.getOrInsertFunction( 
-      SanCovTraceConstCmp1, SanCovTraceCmpZeroExtAL, VoidTy, Int8Ty, Int8Ty); 
-  SanCovTraceConstCmpFunction[1] = M.getOrInsertFunction( 
-      SanCovTraceConstCmp2, SanCovTraceCmpZeroExtAL, VoidTy, Int16Ty, Int16Ty); 
-  SanCovTraceConstCmpFunction[2] = M.getOrInsertFunction( 
-      SanCovTraceConstCmp4, SanCovTraceCmpZeroExtAL, VoidTy, Int32Ty, Int32Ty); 
-  SanCovTraceConstCmpFunction[3] = 
-      M.getOrInsertFunction(SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty); 
- 
-  { 
-    AttributeList AL; 
+
+  SanCovTraceCmpFunction[0] =
+      M.getOrInsertFunction(SanCovTraceCmp1, SanCovTraceCmpZeroExtAL, VoidTy,
+                            IRB.getInt8Ty(), IRB.getInt8Ty());
+  SanCovTraceCmpFunction[1] =
+      M.getOrInsertFunction(SanCovTraceCmp2, SanCovTraceCmpZeroExtAL, VoidTy,
+                            IRB.getInt16Ty(), IRB.getInt16Ty());
+  SanCovTraceCmpFunction[2] =
+      M.getOrInsertFunction(SanCovTraceCmp4, SanCovTraceCmpZeroExtAL, VoidTy,
+                            IRB.getInt32Ty(), IRB.getInt32Ty());
+  SanCovTraceCmpFunction[3] =
+      M.getOrInsertFunction(SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty);
+
+  SanCovTraceConstCmpFunction[0] = M.getOrInsertFunction(
+      SanCovTraceConstCmp1, SanCovTraceCmpZeroExtAL, VoidTy, Int8Ty, Int8Ty);
+  SanCovTraceConstCmpFunction[1] = M.getOrInsertFunction(
+      SanCovTraceConstCmp2, SanCovTraceCmpZeroExtAL, VoidTy, Int16Ty, Int16Ty);
+  SanCovTraceConstCmpFunction[2] = M.getOrInsertFunction(
+      SanCovTraceConstCmp4, SanCovTraceCmpZeroExtAL, VoidTy, Int32Ty, Int32Ty);
+  SanCovTraceConstCmpFunction[3] =
+      M.getOrInsertFunction(SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty);
+
+  {
+    AttributeList AL;
     AL = AL.addParamAttribute(*C, 0, Attribute::ZExt);
-    SanCovTraceDivFunction[0] = 
-        M.getOrInsertFunction(SanCovTraceDiv4, AL, VoidTy, IRB.getInt32Ty()); 
-  } 
-  SanCovTraceDivFunction[1] = 
-      M.getOrInsertFunction(SanCovTraceDiv8, VoidTy, Int64Ty); 
-  SanCovTraceGepFunction = 
-      M.getOrInsertFunction(SanCovTraceGep, VoidTy, IntptrTy); 
-  SanCovTraceSwitchFunction = 
-      M.getOrInsertFunction(SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy); 
- 
-  Constant *SanCovLowestStackConstant = 
-      M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy); 
-  SanCovLowestStack = dyn_cast<GlobalVariable>(SanCovLowestStackConstant); 
-  if (!SanCovLowestStack) { 
-    C->emitError(StringRef("'") + SanCovLowestStackName + 
-                 "' should not be declared by the user"); 
-    return true; 
-  } 
-  SanCovLowestStack->setThreadLocalMode( 
-      GlobalValue::ThreadLocalMode::InitialExecTLSModel); 
-  if (Options.StackDepth && !SanCovLowestStack->isDeclaration()) 
-    SanCovLowestStack->setInitializer(Constant::getAllOnesValue(IntptrTy)); 
- 
-  SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy); 
-  SanCovTracePCGuard = 
-      M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy); 
- 
-  for (auto &F : M) 
-    instrumentFunction(F, DTCallback, PDTCallback); 
- 
-  Function *Ctor = nullptr; 
- 
-  if (FunctionGuardArray) 
-    Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName, 
-                                      SanCovTracePCGuardInitName, Int32PtrTy, 
-                                      SanCovGuardsSectionName); 
-  if (Function8bitCounterArray) 
-    Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName, 
-                                      SanCov8bitCountersInitName, Int8PtrTy, 
-                                      SanCovCountersSectionName); 
-  if (FunctionBoolArray) { 
-    Ctor = CreateInitCallsForSections(M, SanCovModuleCtorBoolFlagName, 
-                                      SanCovBoolFlagInitName, Int1PtrTy, 
-                                      SanCovBoolFlagSectionName); 
-  } 
-  if (Ctor && Options.PCTable) { 
-    auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy); 
-    FunctionCallee InitFunction = declareSanitizerInitFunction( 
-        M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy}); 
-    IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator()); 
-    IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second}); 
-  } 
-  // We don't reference these arrays directly in any of our runtime functions, 
-  // so we need to prevent them from being dead stripped. 
-  if (TargetTriple.isOSBinFormatMachO()) 
-    appendToUsed(M, GlobalsToAppendToUsed); 
-  appendToCompilerUsed(M, GlobalsToAppendToCompilerUsed); 
-  return true; 
-} 
- 
-// True if block has successors and it dominates all of them. 
-static bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) { 
+    SanCovTraceDivFunction[0] =
+        M.getOrInsertFunction(SanCovTraceDiv4, AL, VoidTy, IRB.getInt32Ty());
+  }
+  SanCovTraceDivFunction[1] =
+      M.getOrInsertFunction(SanCovTraceDiv8, VoidTy, Int64Ty);
+  SanCovTraceGepFunction =
+      M.getOrInsertFunction(SanCovTraceGep, VoidTy, IntptrTy);
+  SanCovTraceSwitchFunction =
+      M.getOrInsertFunction(SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy);
+
+  Constant *SanCovLowestStackConstant =
+      M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy);
+  SanCovLowestStack = dyn_cast<GlobalVariable>(SanCovLowestStackConstant);
+  if (!SanCovLowestStack) {
+    C->emitError(StringRef("'") + SanCovLowestStackName +
+                 "' should not be declared by the user");
+    return true;
+  }
+  SanCovLowestStack->setThreadLocalMode(
+      GlobalValue::ThreadLocalMode::InitialExecTLSModel);
+  if (Options.StackDepth && !SanCovLowestStack->isDeclaration())
+    SanCovLowestStack->setInitializer(Constant::getAllOnesValue(IntptrTy));
+
+  SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy);
+  SanCovTracePCGuard =
+      M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy);
+
+  for (auto &F : M)
+    instrumentFunction(F, DTCallback, PDTCallback);
+
+  Function *Ctor = nullptr;
+
+  if (FunctionGuardArray)
+    Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName,
+                                      SanCovTracePCGuardInitName, Int32PtrTy,
+                                      SanCovGuardsSectionName);
+  if (Function8bitCounterArray)
+    Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName,
+                                      SanCov8bitCountersInitName, Int8PtrTy,
+                                      SanCovCountersSectionName);
+  if (FunctionBoolArray) {
+    Ctor = CreateInitCallsForSections(M, SanCovModuleCtorBoolFlagName,
+                                      SanCovBoolFlagInitName, Int1PtrTy,
+                                      SanCovBoolFlagSectionName);
+  }
+  if (Ctor && Options.PCTable) {
+    auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy);
+    FunctionCallee InitFunction = declareSanitizerInitFunction(
+        M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy});
+    IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
+    IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
+  }
+  // We don't reference these arrays directly in any of our runtime functions,
+  // so we need to prevent them from being dead stripped.
+  if (TargetTriple.isOSBinFormatMachO())
+    appendToUsed(M, GlobalsToAppendToUsed);
+  appendToCompilerUsed(M, GlobalsToAppendToCompilerUsed);
+  return true;
+}
+
+// True if block has successors and it dominates all of them.
+static bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) {
   if (succ_empty(BB))
-    return false; 
- 
+    return false;
+
   return llvm::all_of(successors(BB), [&](const BasicBlock *SUCC) {
     return DT->dominates(BB, SUCC);
   });
-} 
- 
-// True if block has predecessors and it postdominates all of them. 
-static bool isFullPostDominator(const BasicBlock *BB, 
-                                const PostDominatorTree *PDT) { 
+}
+
+// True if block has predecessors and it postdominates all of them.
+static bool isFullPostDominator(const BasicBlock *BB,
+                                const PostDominatorTree *PDT) {
   if (pred_empty(BB))
-    return false; 
- 
+    return false;
+
   return llvm::all_of(predecessors(BB), [&](const BasicBlock *PRED) {
     return PDT->dominates(BB, PRED);
   });
-} 
- 
-static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB, 
-                                  const DominatorTree *DT, 
-                                  const PostDominatorTree *PDT, 
-                                  const SanitizerCoverageOptions &Options) { 
-  // Don't insert coverage for blocks containing nothing but unreachable: we 
-  // will never call __sanitizer_cov() for them, so counting them in 
-  // NumberOfInstrumentedBlocks() might complicate calculation of code coverage 
-  // percentage. Also, unreachable instructions frequently have no debug 
-  // locations. 
-  if (isa<UnreachableInst>(BB->getFirstNonPHIOrDbgOrLifetime())) 
-    return false; 
- 
-  // Don't insert coverage into blocks without a valid insertion point 
-  // (catchswitch blocks). 
-  if (BB->getFirstInsertionPt() == BB->end()) 
-    return false; 
- 
-  if (Options.NoPrune || &F.getEntryBlock() == BB) 
-    return true; 
- 
-  if (Options.CoverageType == SanitizerCoverageOptions::SCK_Function && 
-      &F.getEntryBlock() != BB) 
-    return false; 
- 
-  // Do not instrument full dominators, or full post-dominators with multiple 
-  // predecessors. 
-  return !isFullDominator(BB, DT) 
-    && !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor()); 
-} 
- 
- 
-// Returns true iff From->To is a backedge. 
-// A twist here is that we treat From->To as a backedge if 
-//   * To dominates From or 
-//   * To->UniqueSuccessor dominates From 
-static bool IsBackEdge(BasicBlock *From, BasicBlock *To, 
-                       const DominatorTree *DT) { 
-  if (DT->dominates(To, From)) 
-    return true; 
-  if (auto Next = To->getUniqueSuccessor()) 
-    if (DT->dominates(Next, From)) 
-      return true; 
-  return false; 
-} 
- 
-// Prunes uninteresting Cmp instrumentation: 
-//   * CMP instructions that feed into loop backedge branch. 
-// 
-// Note that Cmp pruning is controlled by the same flag as the 
-// BB pruning. 
-static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT, 
-                             const SanitizerCoverageOptions &Options) { 
-  if (!Options.NoPrune) 
-    if (CMP->hasOneUse()) 
-      if (auto BR = dyn_cast<BranchInst>(CMP->user_back())) 
-        for (BasicBlock *B : BR->successors()) 
-          if (IsBackEdge(BR->getParent(), B, DT)) 
-            return false; 
-  return true; 
-} 
- 
-void ModuleSanitizerCoverage::instrumentFunction( 
-    Function &F, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) { 
-  if (F.empty()) 
-    return; 
-  if (F.getName().find(".module_ctor") != std::string::npos) 
-    return; // Should not instrument sanitizer init functions. 
-  if (F.getName().startswith("__sanitizer_")) 
-    return; // Don't instrument __sanitizer_* callbacks. 
-  // Don't touch available_externally functions, their actual body is elewhere. 
-  if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 
-    return; 
-  // Don't instrument MSVC CRT configuration helpers. They may run before normal 
-  // initialization. 
-  if (F.getName() == "__local_stdio_printf_options" || 
-      F.getName() == "__local_stdio_scanf_options") 
-    return; 
-  if (isa<UnreachableInst>(F.getEntryBlock().getTerminator())) 
-    return; 
-  // Don't instrument functions using SEH for now. Splitting basic blocks like 
-  // we do for coverage breaks WinEHPrepare. 
-  // FIXME: Remove this when SEH no longer uses landingpad pattern matching. 
-  if (F.hasPersonalityFn() && 
-      isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 
-    return; 
-  if (Allowlist && !Allowlist->inSection("coverage", "fun", F.getName())) 
-    return; 
-  if (Blocklist && Blocklist->inSection("coverage", "fun", F.getName())) 
-    return; 
-  if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge) 
-    SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests()); 
-  SmallVector<Instruction *, 8> IndirCalls; 
-  SmallVector<BasicBlock *, 16> BlocksToInstrument; 
-  SmallVector<Instruction *, 8> CmpTraceTargets; 
-  SmallVector<Instruction *, 8> SwitchTraceTargets; 
-  SmallVector<BinaryOperator *, 8> DivTraceTargets; 
-  SmallVector<GetElementPtrInst *, 8> GepTraceTargets; 
- 
-  const DominatorTree *DT = DTCallback(F); 
-  const PostDominatorTree *PDT = PDTCallback(F); 
-  bool IsLeafFunc = true; 
- 
-  for (auto &BB : F) { 
-    if (shouldInstrumentBlock(F, &BB, DT, PDT, Options)) 
-      BlocksToInstrument.push_back(&BB); 
-    for (auto &Inst : BB) { 
-      if (Options.IndirectCalls) { 
-        CallBase *CB = dyn_cast<CallBase>(&Inst); 
-        if (CB && !CB->getCalledFunction()) 
-          IndirCalls.push_back(&Inst); 
-      } 
-      if (Options.TraceCmp) { 
-        if (ICmpInst *CMP = dyn_cast<ICmpInst>(&Inst)) 
-          if (IsInterestingCmp(CMP, DT, Options)) 
-            CmpTraceTargets.push_back(&Inst); 
-        if (isa<SwitchInst>(&Inst)) 
-          SwitchTraceTargets.push_back(&Inst); 
-      } 
-      if (Options.TraceDiv) 
-        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&Inst)) 
-          if (BO->getOpcode() == Instruction::SDiv || 
-              BO->getOpcode() == Instruction::UDiv) 
-            DivTraceTargets.push_back(BO); 
-      if (Options.TraceGep) 
-        if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Inst)) 
-          GepTraceTargets.push_back(GEP); 
-      if (Options.StackDepth) 
-        if (isa<InvokeInst>(Inst) || 
-            (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))) 
-          IsLeafFunc = false; 
-    } 
-  } 
- 
-  InjectCoverage(F, BlocksToInstrument, IsLeafFunc); 
-  InjectCoverageForIndirectCalls(F, IndirCalls); 
-  InjectTraceForCmp(F, CmpTraceTargets); 
-  InjectTraceForSwitch(F, SwitchTraceTargets); 
-  InjectTraceForDiv(F, DivTraceTargets); 
-  InjectTraceForGep(F, GepTraceTargets); 
-} 
- 
-GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection( 
-    size_t NumElements, Function &F, Type *Ty, const char *Section) { 
-  ArrayType *ArrayTy = ArrayType::get(Ty, NumElements); 
-  auto Array = new GlobalVariable( 
-      *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage, 
-      Constant::getNullValue(ArrayTy), "__sancov_gen_"); 
- 
-  if (TargetTriple.supportsCOMDAT() && !F.isInterposable()) 
-    if (auto Comdat = 
-            GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId)) 
-      Array->setComdat(Comdat); 
-  Array->setSection(getSectionName(Section)); 
-  Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize())); 
-  GlobalsToAppendToUsed.push_back(Array); 
-  GlobalsToAppendToCompilerUsed.push_back(Array); 
-  MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F)); 
-  Array->addMetadata(LLVMContext::MD_associated, *MD); 
- 
-  return Array; 
-} 
- 
-GlobalVariable * 
-ModuleSanitizerCoverage::CreatePCArray(Function &F, 
-                                       ArrayRef<BasicBlock *> AllBlocks) { 
-  size_t N = AllBlocks.size(); 
-  assert(N); 
-  SmallVector<Constant *, 32> PCs; 
-  IRBuilder<> IRB(&*F.getEntryBlock().getFirstInsertionPt()); 
-  for (size_t i = 0; i < N; i++) { 
-    if (&F.getEntryBlock() == AllBlocks[i]) { 
-      PCs.push_back((Constant *)IRB.CreatePointerCast(&F, IntptrPtrTy)); 
-      PCs.push_back((Constant *)IRB.CreateIntToPtr( 
-          ConstantInt::get(IntptrTy, 1), IntptrPtrTy)); 
-    } else { 
-      PCs.push_back((Constant *)IRB.CreatePointerCast( 
-          BlockAddress::get(AllBlocks[i]), IntptrPtrTy)); 
-      PCs.push_back((Constant *)IRB.CreateIntToPtr( 
-          ConstantInt::get(IntptrTy, 0), IntptrPtrTy)); 
-    } 
-  } 
-  auto *PCArray = CreateFunctionLocalArrayInSection(N * 2, F, IntptrPtrTy, 
-                                                    SanCovPCsSectionName); 
-  PCArray->setInitializer( 
-      ConstantArray::get(ArrayType::get(IntptrPtrTy, N * 2), PCs)); 
-  PCArray->setConstant(true); 
- 
-  return PCArray; 
-} 
- 
-void ModuleSanitizerCoverage::CreateFunctionLocalArrays( 
-    Function &F, ArrayRef<BasicBlock *> AllBlocks) { 
-  if (Options.TracePCGuard) 
-    FunctionGuardArray = CreateFunctionLocalArrayInSection( 
-        AllBlocks.size(), F, Int32Ty, SanCovGuardsSectionName); 
- 
-  if (Options.Inline8bitCounters) 
-    Function8bitCounterArray = CreateFunctionLocalArrayInSection( 
-        AllBlocks.size(), F, Int8Ty, SanCovCountersSectionName); 
-  if (Options.InlineBoolFlag) 
-    FunctionBoolArray = CreateFunctionLocalArrayInSection( 
-        AllBlocks.size(), F, Int1Ty, SanCovBoolFlagSectionName); 
- 
-  if (Options.PCTable) 
-    FunctionPCsArray = CreatePCArray(F, AllBlocks); 
-} 
- 
-bool ModuleSanitizerCoverage::InjectCoverage(Function &F, 
-                                             ArrayRef<BasicBlock *> AllBlocks, 
-                                             bool IsLeafFunc) { 
-  if (AllBlocks.empty()) return false; 
-  CreateFunctionLocalArrays(F, AllBlocks); 
-  for (size_t i = 0, N = AllBlocks.size(); i < N; i++) 
-    InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc); 
-  return true; 
-} 
- 
-// On every indirect call we call a run-time function 
-// __sanitizer_cov_indir_call* with two parameters: 
-//   - callee address, 
-//   - global cache array that contains CacheSize pointers (zero-initialized). 
-//     The cache is used to speed up recording the caller-callee pairs. 
-// The address of the caller is passed implicitly via caller PC. 
-// CacheSize is encoded in the name of the run-time function. 
-void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls( 
-    Function &F, ArrayRef<Instruction *> IndirCalls) { 
-  if (IndirCalls.empty()) 
-    return; 
-  assert(Options.TracePC || Options.TracePCGuard || 
-         Options.Inline8bitCounters || Options.InlineBoolFlag); 
-  for (auto I : IndirCalls) { 
-    IRBuilder<> IRB(I); 
-    CallBase &CB = cast<CallBase>(*I); 
-    Value *Callee = CB.getCalledOperand(); 
-    if (isa<InlineAsm>(Callee)) 
-      continue; 
-    IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy)); 
-  } 
-} 
- 
-// For every switch statement we insert a call: 
-// __sanitizer_cov_trace_switch(CondValue, 
-//      {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... }) 
- 
-void ModuleSanitizerCoverage::InjectTraceForSwitch( 
-    Function &, ArrayRef<Instruction *> SwitchTraceTargets) { 
-  for (auto I : SwitchTraceTargets) { 
-    if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { 
-      IRBuilder<> IRB(I); 
-      SmallVector<Constant *, 16> Initializers; 
-      Value *Cond = SI->getCondition(); 
-      if (Cond->getType()->getScalarSizeInBits() > 
-          Int64Ty->getScalarSizeInBits()) 
-        continue; 
-      Initializers.push_back(ConstantInt::get(Int64Ty, SI->getNumCases())); 
-      Initializers.push_back( 
-          ConstantInt::get(Int64Ty, Cond->getType()->getScalarSizeInBits())); 
-      if (Cond->getType()->getScalarSizeInBits() < 
-          Int64Ty->getScalarSizeInBits()) 
-        Cond = IRB.CreateIntCast(Cond, Int64Ty, false); 
-      for (auto It : SI->cases()) { 
-        Constant *C = It.getCaseValue(); 
-        if (C->getType()->getScalarSizeInBits() < 
-            Int64Ty->getScalarSizeInBits()) 
-          C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty); 
-        Initializers.push_back(C); 
-      } 
+}
+
+static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
+                                  const DominatorTree *DT,
+                                  const PostDominatorTree *PDT,
+                                  const SanitizerCoverageOptions &Options) {
+  // Don't insert coverage for blocks containing nothing but unreachable: we
+  // will never call __sanitizer_cov() for them, so counting them in
+  // NumberOfInstrumentedBlocks() might complicate calculation of code coverage
+  // percentage. Also, unreachable instructions frequently have no debug
+  // locations.
+  if (isa<UnreachableInst>(BB->getFirstNonPHIOrDbgOrLifetime()))
+    return false;
+
+  // Don't insert coverage into blocks without a valid insertion point
+  // (catchswitch blocks).
+  if (BB->getFirstInsertionPt() == BB->end())
+    return false;
+
+  if (Options.NoPrune || &F.getEntryBlock() == BB)
+    return true;
+
+  if (Options.CoverageType == SanitizerCoverageOptions::SCK_Function &&
+      &F.getEntryBlock() != BB)
+    return false;
+
+  // Do not instrument full dominators, or full post-dominators with multiple
+  // predecessors.
+  return !isFullDominator(BB, DT)
+    && !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor());
+}
+
+
+// Returns true iff From->To is a backedge.
+// A twist here is that we treat From->To as a backedge if
+//   * To dominates From or
+//   * To->UniqueSuccessor dominates From
+static bool IsBackEdge(BasicBlock *From, BasicBlock *To,
+                       const DominatorTree *DT) {
+  if (DT->dominates(To, From))
+    return true;
+  if (auto Next = To->getUniqueSuccessor())
+    if (DT->dominates(Next, From))
+      return true;
+  return false;
+}
+
+// Prunes uninteresting Cmp instrumentation:
+//   * CMP instructions that feed into loop backedge branch.
+//
+// Note that Cmp pruning is controlled by the same flag as the
+// BB pruning.
+static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT,
+                             const SanitizerCoverageOptions &Options) {
+  if (!Options.NoPrune)
+    if (CMP->hasOneUse())
+      if (auto BR = dyn_cast<BranchInst>(CMP->user_back()))
+        for (BasicBlock *B : BR->successors())
+          if (IsBackEdge(BR->getParent(), B, DT))
+            return false;
+  return true;
+}
+
+void ModuleSanitizerCoverage::instrumentFunction(
+    Function &F, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) {
+  if (F.empty())
+    return;
+  if (F.getName().find(".module_ctor") != std::string::npos)
+    return; // Should not instrument sanitizer init functions.
+  if (F.getName().startswith("__sanitizer_"))
+    return; // Don't instrument __sanitizer_* callbacks.
+  // Don't touch available_externally functions, their actual body is elewhere.
+  if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
+    return;
+  // Don't instrument MSVC CRT configuration helpers. They may run before normal
+  // initialization.
+  if (F.getName() == "__local_stdio_printf_options" ||
+      F.getName() == "__local_stdio_scanf_options")
+    return;
+  if (isa<UnreachableInst>(F.getEntryBlock().getTerminator()))
+    return;
+  // Don't instrument functions using SEH for now. Splitting basic blocks like
+  // we do for coverage breaks WinEHPrepare.
+  // FIXME: Remove this when SEH no longer uses landingpad pattern matching.
+  if (F.hasPersonalityFn() &&
+      isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
+    return;
+  if (Allowlist && !Allowlist->inSection("coverage", "fun", F.getName()))
+    return;
+  if (Blocklist && Blocklist->inSection("coverage", "fun", F.getName()))
+    return;
+  if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
+    SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests());
+  SmallVector<Instruction *, 8> IndirCalls;
+  SmallVector<BasicBlock *, 16> BlocksToInstrument;
+  SmallVector<Instruction *, 8> CmpTraceTargets;
+  SmallVector<Instruction *, 8> SwitchTraceTargets;
+  SmallVector<BinaryOperator *, 8> DivTraceTargets;
+  SmallVector<GetElementPtrInst *, 8> GepTraceTargets;
+
+  const DominatorTree *DT = DTCallback(F);
+  const PostDominatorTree *PDT = PDTCallback(F);
+  bool IsLeafFunc = true;
+
+  for (auto &BB : F) {
+    if (shouldInstrumentBlock(F, &BB, DT, PDT, Options))
+      BlocksToInstrument.push_back(&BB);
+    for (auto &Inst : BB) {
+      if (Options.IndirectCalls) {
+        CallBase *CB = dyn_cast<CallBase>(&Inst);
+        if (CB && !CB->getCalledFunction())
+          IndirCalls.push_back(&Inst);
+      }
+      if (Options.TraceCmp) {
+        if (ICmpInst *CMP = dyn_cast<ICmpInst>(&Inst))
+          if (IsInterestingCmp(CMP, DT, Options))
+            CmpTraceTargets.push_back(&Inst);
+        if (isa<SwitchInst>(&Inst))
+          SwitchTraceTargets.push_back(&Inst);
+      }
+      if (Options.TraceDiv)
+        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&Inst))
+          if (BO->getOpcode() == Instruction::SDiv ||
+              BO->getOpcode() == Instruction::UDiv)
+            DivTraceTargets.push_back(BO);
+      if (Options.TraceGep)
+        if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Inst))
+          GepTraceTargets.push_back(GEP);
+      if (Options.StackDepth)
+        if (isa<InvokeInst>(Inst) ||
+            (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst)))
+          IsLeafFunc = false;
+    }
+  }
+
+  InjectCoverage(F, BlocksToInstrument, IsLeafFunc);
+  InjectCoverageForIndirectCalls(F, IndirCalls);
+  InjectTraceForCmp(F, CmpTraceTargets);
+  InjectTraceForSwitch(F, SwitchTraceTargets);
+  InjectTraceForDiv(F, DivTraceTargets);
+  InjectTraceForGep(F, GepTraceTargets);
+}
+
+GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
+    size_t NumElements, Function &F, Type *Ty, const char *Section) {
+  ArrayType *ArrayTy = ArrayType::get(Ty, NumElements);
+  auto Array = new GlobalVariable(
+      *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
+      Constant::getNullValue(ArrayTy), "__sancov_gen_");
+
+  if (TargetTriple.supportsCOMDAT() && !F.isInterposable())
+    if (auto Comdat =
+            GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
+      Array->setComdat(Comdat);
+  Array->setSection(getSectionName(Section));
+  Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize()));
+  GlobalsToAppendToUsed.push_back(Array);
+  GlobalsToAppendToCompilerUsed.push_back(Array);
+  MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F));
+  Array->addMetadata(LLVMContext::MD_associated, *MD);
+
+  return Array;
+}
+
+GlobalVariable *
+ModuleSanitizerCoverage::CreatePCArray(Function &F,
+                                       ArrayRef<BasicBlock *> AllBlocks) {
+  size_t N = AllBlocks.size();
+  assert(N);
+  SmallVector<Constant *, 32> PCs;
+  IRBuilder<> IRB(&*F.getEntryBlock().getFirstInsertionPt());
+  for (size_t i = 0; i < N; i++) {
+    if (&F.getEntryBlock() == AllBlocks[i]) {
+      PCs.push_back((Constant *)IRB.CreatePointerCast(&F, IntptrPtrTy));
+      PCs.push_back((Constant *)IRB.CreateIntToPtr(
+          ConstantInt::get(IntptrTy, 1), IntptrPtrTy));
+    } else {
+      PCs.push_back((Constant *)IRB.CreatePointerCast(
+          BlockAddress::get(AllBlocks[i]), IntptrPtrTy));
+      PCs.push_back((Constant *)IRB.CreateIntToPtr(
+          ConstantInt::get(IntptrTy, 0), IntptrPtrTy));
+    }
+  }
+  auto *PCArray = CreateFunctionLocalArrayInSection(N * 2, F, IntptrPtrTy,
+                                                    SanCovPCsSectionName);
+  PCArray->setInitializer(
+      ConstantArray::get(ArrayType::get(IntptrPtrTy, N * 2), PCs));
+  PCArray->setConstant(true);
+
+  return PCArray;
+}
+
+void ModuleSanitizerCoverage::CreateFunctionLocalArrays(
+    Function &F, ArrayRef<BasicBlock *> AllBlocks) {
+  if (Options.TracePCGuard)
+    FunctionGuardArray = CreateFunctionLocalArrayInSection(
+        AllBlocks.size(), F, Int32Ty, SanCovGuardsSectionName);
+
+  if (Options.Inline8bitCounters)
+    Function8bitCounterArray = CreateFunctionLocalArrayInSection(
+        AllBlocks.size(), F, Int8Ty, SanCovCountersSectionName);
+  if (Options.InlineBoolFlag)
+    FunctionBoolArray = CreateFunctionLocalArrayInSection(
+        AllBlocks.size(), F, Int1Ty, SanCovBoolFlagSectionName);
+
+  if (Options.PCTable)
+    FunctionPCsArray = CreatePCArray(F, AllBlocks);
+}
+
+bool ModuleSanitizerCoverage::InjectCoverage(Function &F,
+                                             ArrayRef<BasicBlock *> AllBlocks,
+                                             bool IsLeafFunc) {
+  if (AllBlocks.empty()) return false;
+  CreateFunctionLocalArrays(F, AllBlocks);
+  for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
+    InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc);
+  return true;
+}
+
+// On every indirect call we call a run-time function
+// __sanitizer_cov_indir_call* with two parameters:
+//   - callee address,
+//   - global cache array that contains CacheSize pointers (zero-initialized).
+//     The cache is used to speed up recording the caller-callee pairs.
+// The address of the caller is passed implicitly via caller PC.
+// CacheSize is encoded in the name of the run-time function.
+void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls(
+    Function &F, ArrayRef<Instruction *> IndirCalls) {
+  if (IndirCalls.empty())
+    return;
+  assert(Options.TracePC || Options.TracePCGuard ||
+         Options.Inline8bitCounters || Options.InlineBoolFlag);
+  for (auto I : IndirCalls) {
+    IRBuilder<> IRB(I);
+    CallBase &CB = cast<CallBase>(*I);
+    Value *Callee = CB.getCalledOperand();
+    if (isa<InlineAsm>(Callee))
+      continue;
+    IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy));
+  }
+}
+
+// For every switch statement we insert a call:
+// __sanitizer_cov_trace_switch(CondValue,
+//      {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... })
+
+void ModuleSanitizerCoverage::InjectTraceForSwitch(
+    Function &, ArrayRef<Instruction *> SwitchTraceTargets) {
+  for (auto I : SwitchTraceTargets) {
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+      IRBuilder<> IRB(I);
+      SmallVector<Constant *, 16> Initializers;
+      Value *Cond = SI->getCondition();
+      if (Cond->getType()->getScalarSizeInBits() >
+          Int64Ty->getScalarSizeInBits())
+        continue;
+      Initializers.push_back(ConstantInt::get(Int64Ty, SI->getNumCases()));
+      Initializers.push_back(
+          ConstantInt::get(Int64Ty, Cond->getType()->getScalarSizeInBits()));
+      if (Cond->getType()->getScalarSizeInBits() <
+          Int64Ty->getScalarSizeInBits())
+        Cond = IRB.CreateIntCast(Cond, Int64Ty, false);
+      for (auto It : SI->cases()) {
+        Constant *C = It.getCaseValue();
+        if (C->getType()->getScalarSizeInBits() <
+            Int64Ty->getScalarSizeInBits())
+          C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty);
+        Initializers.push_back(C);
+      }
       llvm::sort(drop_begin(Initializers, 2),
-                 [](const Constant *A, const Constant *B) { 
-                   return cast<ConstantInt>(A)->getLimitedValue() < 
-                          cast<ConstantInt>(B)->getLimitedValue(); 
-                 }); 
-      ArrayType *ArrayOfInt64Ty = ArrayType::get(Int64Ty, Initializers.size()); 
-      GlobalVariable *GV = new GlobalVariable( 
-          *CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage, 
-          ConstantArray::get(ArrayOfInt64Ty, Initializers), 
-          "__sancov_gen_cov_switch_values"); 
-      IRB.CreateCall(SanCovTraceSwitchFunction, 
-                     {Cond, IRB.CreatePointerCast(GV, Int64PtrTy)}); 
-    } 
-  } 
-} 
- 
-void ModuleSanitizerCoverage::InjectTraceForDiv( 
-    Function &, ArrayRef<BinaryOperator *> DivTraceTargets) { 
-  for (auto BO : DivTraceTargets) { 
-    IRBuilder<> IRB(BO); 
-    Value *A1 = BO->getOperand(1); 
-    if (isa<ConstantInt>(A1)) continue; 
-    if (!A1->getType()->isIntegerTy()) 
-      continue; 
-    uint64_t TypeSize = DL->getTypeStoreSizeInBits(A1->getType()); 
-    int CallbackIdx = TypeSize == 32 ? 0 : 
-        TypeSize == 64 ? 1 : -1; 
-    if (CallbackIdx < 0) continue; 
-    auto Ty = Type::getIntNTy(*C, TypeSize); 
-    IRB.CreateCall(SanCovTraceDivFunction[CallbackIdx], 
-                   {IRB.CreateIntCast(A1, Ty, true)}); 
-  } 
-} 
- 
-void ModuleSanitizerCoverage::InjectTraceForGep( 
-    Function &, ArrayRef<GetElementPtrInst *> GepTraceTargets) { 
-  for (auto GEP : GepTraceTargets) { 
-    IRBuilder<> IRB(GEP); 
-    for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) 
-      if (!isa<ConstantInt>(*I) && (*I)->getType()->isIntegerTy()) 
-        IRB.CreateCall(SanCovTraceGepFunction, 
-                       {IRB.CreateIntCast(*I, IntptrTy, true)}); 
-  } 
-} 
- 
-void ModuleSanitizerCoverage::InjectTraceForCmp( 
-    Function &, ArrayRef<Instruction *> CmpTraceTargets) { 
-  for (auto I : CmpTraceTargets) { 
-    if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) { 
-      IRBuilder<> IRB(ICMP); 
-      Value *A0 = ICMP->getOperand(0); 
-      Value *A1 = ICMP->getOperand(1); 
-      if (!A0->getType()->isIntegerTy()) 
-        continue; 
-      uint64_t TypeSize = DL->getTypeStoreSizeInBits(A0->getType()); 
-      int CallbackIdx = TypeSize == 8 ? 0 : 
-                        TypeSize == 16 ? 1 : 
-                        TypeSize == 32 ? 2 : 
-                        TypeSize == 64 ? 3 : -1; 
-      if (CallbackIdx < 0) continue; 
-      // __sanitizer_cov_trace_cmp((type_size << 32) | predicate, A0, A1); 
-      auto CallbackFunc = SanCovTraceCmpFunction[CallbackIdx]; 
-      bool FirstIsConst = isa<ConstantInt>(A0); 
-      bool SecondIsConst = isa<ConstantInt>(A1); 
-      // If both are const, then we don't need such a comparison. 
-      if (FirstIsConst && SecondIsConst) continue; 
-      // If only one is const, then make it the first callback argument. 
-      if (FirstIsConst || SecondIsConst) { 
-        CallbackFunc = SanCovTraceConstCmpFunction[CallbackIdx]; 
-        if (SecondIsConst) 
-          std::swap(A0, A1); 
-      } 
- 
-      auto Ty = Type::getIntNTy(*C, TypeSize); 
-      IRB.CreateCall(CallbackFunc, {IRB.CreateIntCast(A0, Ty, true), 
-              IRB.CreateIntCast(A1, Ty, true)}); 
-    } 
-  } 
-} 
- 
-void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, 
-                                                    size_t Idx, 
-                                                    bool IsLeafFunc) { 
-  BasicBlock::iterator IP = BB.getFirstInsertionPt(); 
-  bool IsEntryBB = &BB == &F.getEntryBlock(); 
-  DebugLoc EntryLoc; 
-  if (IsEntryBB) { 
-    if (auto SP = F.getSubprogram()) 
+                 [](const Constant *A, const Constant *B) {
+                   return cast<ConstantInt>(A)->getLimitedValue() <
+                          cast<ConstantInt>(B)->getLimitedValue();
+                 });
+      ArrayType *ArrayOfInt64Ty = ArrayType::get(Int64Ty, Initializers.size());
+      GlobalVariable *GV = new GlobalVariable(
+          *CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage,
+          ConstantArray::get(ArrayOfInt64Ty, Initializers),
+          "__sancov_gen_cov_switch_values");
+      IRB.CreateCall(SanCovTraceSwitchFunction,
+                     {Cond, IRB.CreatePointerCast(GV, Int64PtrTy)});
+    }
+  }
+}
+
+void ModuleSanitizerCoverage::InjectTraceForDiv(
+    Function &, ArrayRef<BinaryOperator *> DivTraceTargets) {
+  for (auto BO : DivTraceTargets) {
+    IRBuilder<> IRB(BO);
+    Value *A1 = BO->getOperand(1);
+    if (isa<ConstantInt>(A1)) continue;
+    if (!A1->getType()->isIntegerTy())
+      continue;
+    uint64_t TypeSize = DL->getTypeStoreSizeInBits(A1->getType());
+    int CallbackIdx = TypeSize == 32 ? 0 :
+        TypeSize == 64 ? 1 : -1;
+    if (CallbackIdx < 0) continue;
+    auto Ty = Type::getIntNTy(*C, TypeSize);
+    IRB.CreateCall(SanCovTraceDivFunction[CallbackIdx],
+                   {IRB.CreateIntCast(A1, Ty, true)});
+  }
+}
+
+void ModuleSanitizerCoverage::InjectTraceForGep(
+    Function &, ArrayRef<GetElementPtrInst *> GepTraceTargets) {
+  for (auto GEP : GepTraceTargets) {
+    IRBuilder<> IRB(GEP);
+    for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
+      if (!isa<ConstantInt>(*I) && (*I)->getType()->isIntegerTy())
+        IRB.CreateCall(SanCovTraceGepFunction,
+                       {IRB.CreateIntCast(*I, IntptrTy, true)});
+  }
+}
+
+void ModuleSanitizerCoverage::InjectTraceForCmp(
+    Function &, ArrayRef<Instruction *> CmpTraceTargets) {
+  for (auto I : CmpTraceTargets) {
+    if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) {
+      IRBuilder<> IRB(ICMP);
+      Value *A0 = ICMP->getOperand(0);
+      Value *A1 = ICMP->getOperand(1);
+      if (!A0->getType()->isIntegerTy())
+        continue;
+      uint64_t TypeSize = DL->getTypeStoreSizeInBits(A0->getType());
+      int CallbackIdx = TypeSize == 8 ? 0 :
+                        TypeSize == 16 ? 1 :
+                        TypeSize == 32 ? 2 :
+                        TypeSize == 64 ? 3 : -1;
+      if (CallbackIdx < 0) continue;
+      // __sanitizer_cov_trace_cmp((type_size << 32) | predicate, A0, A1);
+      auto CallbackFunc = SanCovTraceCmpFunction[CallbackIdx];
+      bool FirstIsConst = isa<ConstantInt>(A0);
+      bool SecondIsConst = isa<ConstantInt>(A1);
+      // If both are const, then we don't need such a comparison.
+      if (FirstIsConst && SecondIsConst) continue;
+      // If only one is const, then make it the first callback argument.
+      if (FirstIsConst || SecondIsConst) {
+        CallbackFunc = SanCovTraceConstCmpFunction[CallbackIdx];
+        if (SecondIsConst)
+          std::swap(A0, A1);
+      }
+
+      auto Ty = Type::getIntNTy(*C, TypeSize);
+      IRB.CreateCall(CallbackFunc, {IRB.CreateIntCast(A0, Ty, true),
+              IRB.CreateIntCast(A1, Ty, true)});
+    }
+  }
+}
+
+void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
+                                                    size_t Idx,
+                                                    bool IsLeafFunc) {
+  BasicBlock::iterator IP = BB.getFirstInsertionPt();
+  bool IsEntryBB = &BB == &F.getEntryBlock();
+  DebugLoc EntryLoc;
+  if (IsEntryBB) {
+    if (auto SP = F.getSubprogram())
       EntryLoc = DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP);
-    // Keep static allocas and llvm.localescape calls in the entry block.  Even 
-    // if we aren't splitting the block, it's nice for allocas to be before 
-    // calls. 
-    IP = PrepareToSplitEntryBlock(BB, IP); 
-  } else { 
-    EntryLoc = IP->getDebugLoc(); 
-  } 
- 
-  IRBuilder<> IRB(&*IP); 
-  IRB.SetCurrentDebugLocation(EntryLoc); 
-  if (Options.TracePC) { 
-    IRB.CreateCall(SanCovTracePC) 
-        ->setCannotMerge(); // gets the PC using GET_CALLER_PC. 
-  } 
-  if (Options.TracePCGuard) { 
-    auto GuardPtr = IRB.CreateIntToPtr( 
-        IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), 
-                      ConstantInt::get(IntptrTy, Idx * 4)), 
-        Int32PtrTy); 
-    IRB.CreateCall(SanCovTracePCGuard, GuardPtr)->setCannotMerge(); 
-  } 
-  if (Options.Inline8bitCounters) { 
-    auto CounterPtr = IRB.CreateGEP( 
-        Function8bitCounterArray->getValueType(), Function8bitCounterArray, 
-        {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)}); 
-    auto Load = IRB.CreateLoad(Int8Ty, CounterPtr); 
-    auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1)); 
-    auto Store = IRB.CreateStore(Inc, CounterPtr); 
-    SetNoSanitizeMetadata(Load); 
-    SetNoSanitizeMetadata(Store); 
-  } 
-  if (Options.InlineBoolFlag) { 
-    auto FlagPtr = IRB.CreateGEP( 
-        FunctionBoolArray->getValueType(), FunctionBoolArray, 
-        {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)}); 
-    auto Load = IRB.CreateLoad(Int1Ty, FlagPtr); 
-    auto ThenTerm = 
-        SplitBlockAndInsertIfThen(IRB.CreateIsNull(Load), &*IP, false); 
-    IRBuilder<> ThenIRB(ThenTerm); 
-    auto Store = ThenIRB.CreateStore(ConstantInt::getTrue(Int1Ty), FlagPtr); 
-    SetNoSanitizeMetadata(Load); 
-    SetNoSanitizeMetadata(Store); 
-  } 
-  if (Options.StackDepth && IsEntryBB && !IsLeafFunc) { 
-    // Check stack depth.  If it's the deepest so far, record it. 
-    Module *M = F.getParent(); 
-    Function *GetFrameAddr = Intrinsic::getDeclaration( 
-        M, Intrinsic::frameaddress, 
-        IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace())); 
-    auto FrameAddrPtr = 
-        IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)}); 
-    auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy); 
-    auto LowestStack = IRB.CreateLoad(IntptrTy, SanCovLowestStack); 
-    auto IsStackLower = IRB.CreateICmpULT(FrameAddrInt, LowestStack); 
-    auto ThenTerm = SplitBlockAndInsertIfThen(IsStackLower, &*IP, false); 
-    IRBuilder<> ThenIRB(ThenTerm); 
-    auto Store = ThenIRB.CreateStore(FrameAddrInt, SanCovLowestStack); 
-    SetNoSanitizeMetadata(LowestStack); 
-    SetNoSanitizeMetadata(Store); 
-  } 
-} 
- 
-std::string 
-ModuleSanitizerCoverage::getSectionName(const std::string &Section) const { 
-  if (TargetTriple.isOSBinFormatCOFF()) { 
-    if (Section == SanCovCountersSectionName) 
-      return ".SCOV$CM"; 
-    if (Section == SanCovBoolFlagSectionName) 
-      return ".SCOV$BM"; 
-    if (Section == SanCovPCsSectionName) 
-      return ".SCOVP$M"; 
-    return ".SCOV$GM"; // For SanCovGuardsSectionName. 
-  } 
-  if (TargetTriple.isOSBinFormatMachO()) 
-    return "__DATA,__" + Section; 
-  return "__" + Section; 
-} 
- 
-std::string 
-ModuleSanitizerCoverage::getSectionStart(const std::string &Section) const { 
-  if (TargetTriple.isOSBinFormatMachO()) 
-    return "\1section$start$__DATA$__" + Section; 
-  return "__start___" + Section; 
-} 
- 
-std::string 
-ModuleSanitizerCoverage::getSectionEnd(const std::string &Section) const { 
-  if (TargetTriple.isOSBinFormatMachO()) 
-    return "\1section$end$__DATA$__" + Section; 
-  return "__stop___" + Section; 
-} 
- 
-char ModuleSanitizerCoverageLegacyPass::ID = 0; 
-INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov", 
-                      "Pass for instrumenting coverage on functions", false, 
-                      false) 
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 
-INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov", 
-                    "Pass for instrumenting coverage on functions", false, 
-                    false) 
-ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass( 
-    const SanitizerCoverageOptions &Options, 
-    const std::vector<std::string> &AllowlistFiles, 
-    const std::vector<std::string> &BlocklistFiles) { 
-  return new ModuleSanitizerCoverageLegacyPass(Options, AllowlistFiles, 
-                                               BlocklistFiles); 
-} 
+    // Keep static allocas and llvm.localescape calls in the entry block.  Even
+    // if we aren't splitting the block, it's nice for allocas to be before
+    // calls.
+    IP = PrepareToSplitEntryBlock(BB, IP);
+  } else {
+    EntryLoc = IP->getDebugLoc();
+  }
+
+  IRBuilder<> IRB(&*IP);
+  IRB.SetCurrentDebugLocation(EntryLoc);
+  if (Options.TracePC) {
+    IRB.CreateCall(SanCovTracePC)
+        ->setCannotMerge(); // gets the PC using GET_CALLER_PC.
+  }
+  if (Options.TracePCGuard) {
+    auto GuardPtr = IRB.CreateIntToPtr(
+        IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+                      ConstantInt::get(IntptrTy, Idx * 4)),
+        Int32PtrTy);
+    IRB.CreateCall(SanCovTracePCGuard, GuardPtr)->setCannotMerge();
+  }
+  if (Options.Inline8bitCounters) {
+    auto CounterPtr = IRB.CreateGEP(
+        Function8bitCounterArray->getValueType(), Function8bitCounterArray,
+        {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
+    auto Load = IRB.CreateLoad(Int8Ty, CounterPtr);
+    auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1));
+    auto Store = IRB.CreateStore(Inc, CounterPtr);
+    SetNoSanitizeMetadata(Load);
+    SetNoSanitizeMetadata(Store);
+  }
+  if (Options.InlineBoolFlag) {
+    auto FlagPtr = IRB.CreateGEP(
+        FunctionBoolArray->getValueType(), FunctionBoolArray,
+        {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
+    auto Load = IRB.CreateLoad(Int1Ty, FlagPtr);
+    auto ThenTerm =
+        SplitBlockAndInsertIfThen(IRB.CreateIsNull(Load), &*IP, false);
+    IRBuilder<> ThenIRB(ThenTerm);
+    auto Store = ThenIRB.CreateStore(ConstantInt::getTrue(Int1Ty), FlagPtr);
+    SetNoSanitizeMetadata(Load);
+    SetNoSanitizeMetadata(Store);
+  }
+  if (Options.StackDepth && IsEntryBB && !IsLeafFunc) {
+    // Check stack depth.  If it's the deepest so far, record it.
+    Module *M = F.getParent();
+    Function *GetFrameAddr = Intrinsic::getDeclaration(
+        M, Intrinsic::frameaddress,
+        IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
+    auto FrameAddrPtr =
+        IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)});
+    auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy);
+    auto LowestStack = IRB.CreateLoad(IntptrTy, SanCovLowestStack);
+    auto IsStackLower = IRB.CreateICmpULT(FrameAddrInt, LowestStack);
+    auto ThenTerm = SplitBlockAndInsertIfThen(IsStackLower, &*IP, false);
+    IRBuilder<> ThenIRB(ThenTerm);
+    auto Store = ThenIRB.CreateStore(FrameAddrInt, SanCovLowestStack);
+    SetNoSanitizeMetadata(LowestStack);
+    SetNoSanitizeMetadata(Store);
+  }
+}
+
+std::string
+ModuleSanitizerCoverage::getSectionName(const std::string &Section) const {
+  if (TargetTriple.isOSBinFormatCOFF()) {
+    if (Section == SanCovCountersSectionName)
+      return ".SCOV$CM";
+    if (Section == SanCovBoolFlagSectionName)
+      return ".SCOV$BM";
+    if (Section == SanCovPCsSectionName)
+      return ".SCOVP$M";
+    return ".SCOV$GM"; // For SanCovGuardsSectionName.
+  }
+  if (TargetTriple.isOSBinFormatMachO())
+    return "__DATA,__" + Section;
+  return "__" + Section;
+}
+
+std::string
+ModuleSanitizerCoverage::getSectionStart(const std::string &Section) const {
+  if (TargetTriple.isOSBinFormatMachO())
+    return "\1section$start$__DATA$__" + Section;
+  return "__start___" + Section;
+}
+
+std::string
+ModuleSanitizerCoverage::getSectionEnd(const std::string &Section) const {
+  if (TargetTriple.isOSBinFormatMachO())
+    return "\1section$end$__DATA$__" + Section;
+  return "__stop___" + Section;
+}
+
+char ModuleSanitizerCoverageLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov",
+                      "Pass for instrumenting coverage on functions", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov",
+                    "Pass for instrumenting coverage on functions", false,
+                    false)
+ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass(
+    const SanitizerCoverageOptions &Options,
+    const std::vector<std::string> &AllowlistFiles,
+    const std::vector<std::string> &BlocklistFiles) {
+  return new ModuleSanitizerCoverageLegacyPass(Options, AllowlistFiles,
+                                               BlocklistFiles);
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 0b53ff8a83..783878cf1e 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -1,113 +1,113 @@
-//===-- ThreadSanitizer.cpp - race detector -------------------------------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file is a part of ThreadSanitizer, a race detector. 
-// 
-// The tool is under development, for the details about previous versions see 
-// http://code.google.com/p/data-race-test 
-// 
-// The instrumentation phase is quite simple: 
-//   - Insert calls to run-time library before every memory access. 
-//      - Optimizations may apply to avoid instrumenting some of the accesses. 
-//   - Insert calls at function entry/exit. 
-// The rest is handled by the run-time library. 
-//===----------------------------------------------------------------------===// 
- 
-#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" 
+//===-- ThreadSanitizer.cpp - race detector -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer, a race detector.
+//
+// The tool is under development, for the details about previous versions see
+// http://code.google.com/p/data-race-test
+//
+// The instrumentation phase is quite simple:
+//   - Insert calls to run-time library before every memory access.
+//      - Optimizations may apply to avoid instrumenting some of the accesses.
+//   - Insert calls at function entry/exit.
+// The rest is handled by the run-time library.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallString.h" 
-#include "llvm/ADT/SmallVector.h" 
-#include "llvm/ADT/Statistic.h" 
-#include "llvm/ADT/StringExtras.h" 
-#include "llvm/Analysis/CaptureTracking.h" 
-#include "llvm/Analysis/TargetLibraryInfo.h" 
-#include "llvm/Analysis/ValueTracking.h" 
-#include "llvm/IR/DataLayout.h" 
-#include "llvm/IR/Function.h" 
-#include "llvm/IR/IRBuilder.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/IR/Intrinsics.h" 
-#include "llvm/IR/LLVMContext.h" 
-#include "llvm/IR/Metadata.h" 
-#include "llvm/IR/Module.h" 
-#include "llvm/IR/Type.h" 
-#include "llvm/InitializePasses.h" 
-#include "llvm/ProfileData/InstrProf.h" 
-#include "llvm/Support/CommandLine.h" 
-#include "llvm/Support/Debug.h" 
-#include "llvm/Support/MathExtras.h" 
-#include "llvm/Support/raw_ostream.h" 
-#include "llvm/Transforms/Instrumentation.h" 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h" 
-#include "llvm/Transforms/Utils/EscapeEnumerator.h" 
-#include "llvm/Transforms/Utils/Local.h" 
-#include "llvm/Transforms/Utils/ModuleUtils.h" 
- 
-using namespace llvm; 
- 
-#define DEBUG_TYPE "tsan" 
- 
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "tsan"
+
 static cl::opt<bool> ClInstrumentMemoryAccesses(
-    "tsan-instrument-memory-accesses", cl::init(true), 
-    cl::desc("Instrument memory accesses"), cl::Hidden); 
+    "tsan-instrument-memory-accesses", cl::init(true),
+    cl::desc("Instrument memory accesses"), cl::Hidden);
 static cl::opt<bool>
     ClInstrumentFuncEntryExit("tsan-instrument-func-entry-exit", cl::init(true),
                               cl::desc("Instrument function entry and exit"),
                               cl::Hidden);
 static cl::opt<bool> ClHandleCxxExceptions(
-    "tsan-handle-cxx-exceptions", cl::init(true), 
-    cl::desc("Handle C++ exceptions (insert cleanup blocks for unwinding)"), 
-    cl::Hidden); 
+    "tsan-handle-cxx-exceptions", cl::init(true),
+    cl::desc("Handle C++ exceptions (insert cleanup blocks for unwinding)"),
+    cl::Hidden);
 static cl::opt<bool> ClInstrumentAtomics("tsan-instrument-atomics",
                                          cl::init(true),
                                          cl::desc("Instrument atomics"),
                                          cl::Hidden);
 static cl::opt<bool> ClInstrumentMemIntrinsics(
-    "tsan-instrument-memintrinsics", cl::init(true), 
-    cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); 
+    "tsan-instrument-memintrinsics", cl::init(true),
+    cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
 static cl::opt<bool> ClDistinguishVolatile(
-    "tsan-distinguish-volatile", cl::init(false), 
-    cl::desc("Emit special instrumentation for accesses to volatiles"), 
-    cl::Hidden); 
+    "tsan-distinguish-volatile", cl::init(false),
+    cl::desc("Emit special instrumentation for accesses to volatiles"),
+    cl::Hidden);
 static cl::opt<bool> ClInstrumentReadBeforeWrite(
-    "tsan-instrument-read-before-write", cl::init(false), 
-    cl::desc("Do not eliminate read instrumentation for read-before-writes"), 
-    cl::Hidden); 
+    "tsan-instrument-read-before-write", cl::init(false),
+    cl::desc("Do not eliminate read instrumentation for read-before-writes"),
+    cl::Hidden);
 static cl::opt<bool> ClCompoundReadBeforeWrite(
     "tsan-compound-read-before-write", cl::init(false),
     cl::desc("Emit special compound instrumentation for reads-before-writes"),
     cl::Hidden);
- 
-STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); 
-STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); 
-STATISTIC(NumOmittedReadsBeforeWrite, 
-          "Number of reads ignored due to following writes"); 
-STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); 
-STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); 
-STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads"); 
-STATISTIC(NumOmittedReadsFromConstantGlobals, 
-          "Number of reads from constant globals"); 
-STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads"); 
-STATISTIC(NumOmittedNonCaptured, "Number of accesses ignored due to capturing"); 
- 
+
+STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
+STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+STATISTIC(NumOmittedReadsBeforeWrite,
+          "Number of reads ignored due to following writes");
+STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size");
+STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes");
+STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads");
+STATISTIC(NumOmittedReadsFromConstantGlobals,
+          "Number of reads from constant globals");
+STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads");
+STATISTIC(NumOmittedNonCaptured, "Number of accesses ignored due to capturing");
+
 const char kTsanModuleCtorName[] = "tsan.module_ctor";
 const char kTsanInitName[] = "__tsan_init";
- 
-namespace { 
- 
-/// ThreadSanitizer: instrument the code in module to find races. 
-/// 
-/// Instantiating ThreadSanitizer inserts the tsan runtime library API function 
-/// declarations into the module if they don't exist already. Instantiating 
-/// ensures the __tsan_init function is in the list of global constructors for 
-/// the module. 
-struct ThreadSanitizer { 
+
+namespace {
+
+/// ThreadSanitizer: instrument the code in module to find races.
+///
+/// Instantiating ThreadSanitizer inserts the tsan runtime library API function
+/// declarations into the module if they don't exist already. Instantiating
+/// ensures the __tsan_init function is in the list of global constructors for
+/// the module.
+struct ThreadSanitizer {
   ThreadSanitizer() {
     // Sanity check options and warn user.
     if (ClInstrumentReadBeforeWrite && ClCompoundReadBeforeWrite) {
@@ -117,9 +117,9 @@ struct ThreadSanitizer {
     }
   }
 
-  bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI); 
- 
-private: 
+  bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
+
+private:
   // Internal Instruction wrapper that contains more information about the
   // Instruction from prior analysis.
   struct InstructionInfo {
@@ -133,172 +133,172 @@ private:
     unsigned Flags = 0;
   };
 
-  void initialize(Module &M); 
+  void initialize(Module &M);
   bool instrumentLoadOrStore(const InstructionInfo &II, const DataLayout &DL);
-  bool instrumentAtomic(Instruction *I, const DataLayout &DL); 
-  bool instrumentMemIntrinsic(Instruction *I); 
-  void chooseInstructionsToInstrument(SmallVectorImpl<Instruction *> &Local, 
+  bool instrumentAtomic(Instruction *I, const DataLayout &DL);
+  bool instrumentMemIntrinsic(Instruction *I);
+  void chooseInstructionsToInstrument(SmallVectorImpl<Instruction *> &Local,
                                       SmallVectorImpl<InstructionInfo> &All,
-                                      const DataLayout &DL); 
-  bool addrPointsToConstantData(Value *Addr); 
-  int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); 
-  void InsertRuntimeIgnores(Function &F); 
- 
-  Type *IntptrTy; 
-  FunctionCallee TsanFuncEntry; 
-  FunctionCallee TsanFuncExit; 
-  FunctionCallee TsanIgnoreBegin; 
-  FunctionCallee TsanIgnoreEnd; 
-  // Accesses sizes are powers of two: 1, 2, 4, 8, 16. 
-  static const size_t kNumberOfAccessSizes = 5; 
-  FunctionCallee TsanRead[kNumberOfAccessSizes]; 
-  FunctionCallee TsanWrite[kNumberOfAccessSizes]; 
-  FunctionCallee TsanUnalignedRead[kNumberOfAccessSizes]; 
-  FunctionCallee TsanUnalignedWrite[kNumberOfAccessSizes]; 
-  FunctionCallee TsanVolatileRead[kNumberOfAccessSizes]; 
-  FunctionCallee TsanVolatileWrite[kNumberOfAccessSizes]; 
-  FunctionCallee TsanUnalignedVolatileRead[kNumberOfAccessSizes]; 
-  FunctionCallee TsanUnalignedVolatileWrite[kNumberOfAccessSizes]; 
+                                      const DataLayout &DL);
+  bool addrPointsToConstantData(Value *Addr);
+  int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
+  void InsertRuntimeIgnores(Function &F);
+
+  Type *IntptrTy;
+  FunctionCallee TsanFuncEntry;
+  FunctionCallee TsanFuncExit;
+  FunctionCallee TsanIgnoreBegin;
+  FunctionCallee TsanIgnoreEnd;
+  // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+  static const size_t kNumberOfAccessSizes = 5;
+  FunctionCallee TsanRead[kNumberOfAccessSizes];
+  FunctionCallee TsanWrite[kNumberOfAccessSizes];
+  FunctionCallee TsanUnalignedRead[kNumberOfAccessSizes];
+  FunctionCallee TsanUnalignedWrite[kNumberOfAccessSizes];
+  FunctionCallee TsanVolatileRead[kNumberOfAccessSizes];
+  FunctionCallee TsanVolatileWrite[kNumberOfAccessSizes];
+  FunctionCallee TsanUnalignedVolatileRead[kNumberOfAccessSizes];
+  FunctionCallee TsanUnalignedVolatileWrite[kNumberOfAccessSizes];
   FunctionCallee TsanCompoundRW[kNumberOfAccessSizes];
   FunctionCallee TsanUnalignedCompoundRW[kNumberOfAccessSizes];
-  FunctionCallee TsanAtomicLoad[kNumberOfAccessSizes]; 
-  FunctionCallee TsanAtomicStore[kNumberOfAccessSizes]; 
-  FunctionCallee TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1] 
-                              [kNumberOfAccessSizes]; 
-  FunctionCallee TsanAtomicCAS[kNumberOfAccessSizes]; 
-  FunctionCallee TsanAtomicThreadFence; 
-  FunctionCallee TsanAtomicSignalFence; 
-  FunctionCallee TsanVptrUpdate; 
-  FunctionCallee TsanVptrLoad; 
-  FunctionCallee MemmoveFn, MemcpyFn, MemsetFn; 
-}; 
- 
-struct ThreadSanitizerLegacyPass : FunctionPass { 
-  ThreadSanitizerLegacyPass() : FunctionPass(ID) { 
-    initializeThreadSanitizerLegacyPassPass(*PassRegistry::getPassRegistry()); 
-  } 
-  StringRef getPassName() const override; 
-  void getAnalysisUsage(AnalysisUsage &AU) const override; 
-  bool runOnFunction(Function &F) override; 
-  bool doInitialization(Module &M) override; 
-  static char ID; // Pass identification, replacement for typeid. 
-private: 
-  Optional<ThreadSanitizer> TSan; 
-}; 
- 
-void insertModuleCtor(Module &M) { 
-  getOrCreateSanitizerCtorAndInitFunctions( 
-      M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{}, 
-      /*InitArgs=*/{}, 
-      // This callback is invoked when the functions are created the first 
-      // time. Hook them into the global ctors list in that case: 
-      [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); }); 
-} 
- 
-}  // namespace 
- 
-PreservedAnalyses ThreadSanitizerPass::run(Function &F, 
-                                           FunctionAnalysisManager &FAM) { 
-  ThreadSanitizer TSan; 
-  if (TSan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F))) 
-    return PreservedAnalyses::none(); 
-  return PreservedAnalyses::all(); 
-} 
- 
-PreservedAnalyses ThreadSanitizerPass::run(Module &M, 
-                                           ModuleAnalysisManager &MAM) { 
-  insertModuleCtor(M); 
-  return PreservedAnalyses::none(); 
-} 
- 
-char ThreadSanitizerLegacyPass::ID = 0; 
-INITIALIZE_PASS_BEGIN(ThreadSanitizerLegacyPass, "tsan", 
-                      "ThreadSanitizer: detects data races.", false, false) 
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 
-INITIALIZE_PASS_END(ThreadSanitizerLegacyPass, "tsan", 
-                    "ThreadSanitizer: detects data races.", false, false) 
- 
-StringRef ThreadSanitizerLegacyPass::getPassName() const { 
-  return "ThreadSanitizerLegacyPass"; 
-} 
- 
-void ThreadSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { 
-  AU.addRequired<TargetLibraryInfoWrapperPass>(); 
-} 
- 
-bool ThreadSanitizerLegacyPass::doInitialization(Module &M) { 
-  insertModuleCtor(M); 
-  TSan.emplace(); 
-  return true; 
-} 
- 
-bool ThreadSanitizerLegacyPass::runOnFunction(Function &F) { 
-  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 
-  TSan->sanitizeFunction(F, TLI); 
-  return true; 
-} 
- 
-FunctionPass *llvm::createThreadSanitizerLegacyPassPass() { 
-  return new ThreadSanitizerLegacyPass(); 
-} 
- 
-void ThreadSanitizer::initialize(Module &M) { 
-  const DataLayout &DL = M.getDataLayout(); 
-  IntptrTy = DL.getIntPtrType(M.getContext()); 
- 
-  IRBuilder<> IRB(M.getContext()); 
-  AttributeList Attr; 
-  Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex, 
-                           Attribute::NoUnwind); 
-  // Initialize the callbacks. 
-  TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", Attr, 
-                                        IRB.getVoidTy(), IRB.getInt8PtrTy()); 
-  TsanFuncExit = 
-      M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy()); 
-  TsanIgnoreBegin = M.getOrInsertFunction("__tsan_ignore_thread_begin", Attr, 
-                                          IRB.getVoidTy()); 
-  TsanIgnoreEnd = 
-      M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy()); 
-  IntegerType *OrdTy = IRB.getInt32Ty(); 
-  for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { 
-    const unsigned ByteSize = 1U << i; 
-    const unsigned BitSize = ByteSize * 8; 
-    std::string ByteSizeStr = utostr(ByteSize); 
-    std::string BitSizeStr = utostr(BitSize); 
-    SmallString<32> ReadName("__tsan_read" + ByteSizeStr); 
-    TsanRead[i] = M.getOrInsertFunction(ReadName, Attr, IRB.getVoidTy(), 
-                                        IRB.getInt8PtrTy()); 
- 
-    SmallString<32> WriteName("__tsan_write" + ByteSizeStr); 
-    TsanWrite[i] = M.getOrInsertFunction(WriteName, Attr, IRB.getVoidTy(), 
-                                         IRB.getInt8PtrTy()); 
- 
-    SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr); 
-    TsanUnalignedRead[i] = M.getOrInsertFunction( 
-        UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); 
- 
-    SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr); 
-    TsanUnalignedWrite[i] = M.getOrInsertFunction( 
-        UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); 
- 
-    SmallString<64> VolatileReadName("__tsan_volatile_read" + ByteSizeStr); 
-    TsanVolatileRead[i] = M.getOrInsertFunction( 
-        VolatileReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); 
- 
-    SmallString<64> VolatileWriteName("__tsan_volatile_write" + ByteSizeStr); 
-    TsanVolatileWrite[i] = M.getOrInsertFunction( 
-        VolatileWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); 
- 
-    SmallString<64> UnalignedVolatileReadName("__tsan_unaligned_volatile_read" + 
-                                              ByteSizeStr); 
-    TsanUnalignedVolatileRead[i] = M.getOrInsertFunction( 
-        UnalignedVolatileReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); 
- 
-    SmallString<64> UnalignedVolatileWriteName( 
-        "__tsan_unaligned_volatile_write" + ByteSizeStr); 
-    TsanUnalignedVolatileWrite[i] = M.getOrInsertFunction( 
-        UnalignedVolatileWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()); 
- 
+  FunctionCallee TsanAtomicLoad[kNumberOfAccessSizes];
+  FunctionCallee TsanAtomicStore[kNumberOfAccessSizes];
+  FunctionCallee TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1]
+                              [kNumberOfAccessSizes];
+  FunctionCallee TsanAtomicCAS[kNumberOfAccessSizes];
+  FunctionCallee TsanAtomicThreadFence;
+  FunctionCallee TsanAtomicSignalFence;
+  FunctionCallee TsanVptrUpdate;
+  FunctionCallee TsanVptrLoad;
+  FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
+};
+
+struct ThreadSanitizerLegacyPass : FunctionPass {
+  ThreadSanitizerLegacyPass() : FunctionPass(ID) {
+    initializeThreadSanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+  StringRef getPassName() const override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnFunction(Function &F) override;
+  bool doInitialization(Module &M) override;
+  static char ID; // Pass identification, replacement for typeid.
+private:
+  Optional<ThreadSanitizer> TSan;
+};
+
+void insertModuleCtor(Module &M) {
+  getOrCreateSanitizerCtorAndInitFunctions(
+      M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{},
+      /*InitArgs=*/{},
+      // This callback is invoked when the functions are created the first
+      // time. Hook them into the global ctors list in that case:
+      [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); });
+}
+
+}  // namespace
+
+PreservedAnalyses ThreadSanitizerPass::run(Function &F,
+                                           FunctionAnalysisManager &FAM) {
+  ThreadSanitizer TSan;
+  if (TSan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+PreservedAnalyses ThreadSanitizerPass::run(Module &M,
+                                           ModuleAnalysisManager &MAM) {
+  insertModuleCtor(M);
+  return PreservedAnalyses::none();
+}
+
+char ThreadSanitizerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ThreadSanitizerLegacyPass, "tsan",
+                      "ThreadSanitizer: detects data races.", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ThreadSanitizerLegacyPass, "tsan",
+                    "ThreadSanitizer: detects data races.", false, false)
+
+StringRef ThreadSanitizerLegacyPass::getPassName() const {
+  return "ThreadSanitizerLegacyPass";
+}
+
+void ThreadSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
+
+bool ThreadSanitizerLegacyPass::doInitialization(Module &M) {
+  insertModuleCtor(M);
+  TSan.emplace();
+  return true;
+}
+
+bool ThreadSanitizerLegacyPass::runOnFunction(Function &F) {
+  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  TSan->sanitizeFunction(F, TLI);
+  return true;
+}
+
+FunctionPass *llvm::createThreadSanitizerLegacyPassPass() {
+  return new ThreadSanitizerLegacyPass();
+}
+
+void ThreadSanitizer::initialize(Module &M) {
+  const DataLayout &DL = M.getDataLayout();
+  IntptrTy = DL.getIntPtrType(M.getContext());
+
+  IRBuilder<> IRB(M.getContext());
+  AttributeList Attr;
+  Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+                           Attribute::NoUnwind);
+  // Initialize the callbacks.
+  TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", Attr,
+                                        IRB.getVoidTy(), IRB.getInt8PtrTy());
+  TsanFuncExit =
+      M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy());
+  TsanIgnoreBegin = M.getOrInsertFunction("__tsan_ignore_thread_begin", Attr,
+                                          IRB.getVoidTy());
+  TsanIgnoreEnd =
+      M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy());
+  IntegerType *OrdTy = IRB.getInt32Ty();
+  for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
+    const unsigned ByteSize = 1U << i;
+    const unsigned BitSize = ByteSize * 8;
+    std::string ByteSizeStr = utostr(ByteSize);
+    std::string BitSizeStr = utostr(BitSize);
+    SmallString<32> ReadName("__tsan_read" + ByteSizeStr);
+    TsanRead[i] = M.getOrInsertFunction(ReadName, Attr, IRB.getVoidTy(),
+                                        IRB.getInt8PtrTy());
+
+    SmallString<32> WriteName("__tsan_write" + ByteSizeStr);
+    TsanWrite[i] = M.getOrInsertFunction(WriteName, Attr, IRB.getVoidTy(),
+                                         IRB.getInt8PtrTy());
+
+    SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr);
+    TsanUnalignedRead[i] = M.getOrInsertFunction(
+        UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
+    SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr);
+    TsanUnalignedWrite[i] = M.getOrInsertFunction(
+        UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
+    SmallString<64> VolatileReadName("__tsan_volatile_read" + ByteSizeStr);
+    TsanVolatileRead[i] = M.getOrInsertFunction(
+        VolatileReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
+    SmallString<64> VolatileWriteName("__tsan_volatile_write" + ByteSizeStr);
+    TsanVolatileWrite[i] = M.getOrInsertFunction(
+        VolatileWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
+    SmallString<64> UnalignedVolatileReadName("__tsan_unaligned_volatile_read" +
+                                              ByteSizeStr);
+    TsanUnalignedVolatileRead[i] = M.getOrInsertFunction(
+        UnalignedVolatileReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
+    SmallString<64> UnalignedVolatileWriteName(
+        "__tsan_unaligned_volatile_write" + ByteSizeStr);
+    TsanUnalignedVolatileWrite[i] = M.getOrInsertFunction(
+        UnalignedVolatileWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
     SmallString<64> CompoundRWName("__tsan_read_write" + ByteSizeStr);
     TsanCompoundRW[i] = M.getOrInsertFunction(
         CompoundRWName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
@@ -308,145 +308,145 @@ void ThreadSanitizer::initialize(Module &M) {
     TsanUnalignedCompoundRW[i] = M.getOrInsertFunction(
         UnalignedCompoundRWName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
 
-    Type *Ty = Type::getIntNTy(M.getContext(), BitSize); 
-    Type *PtrTy = Ty->getPointerTo(); 
-    SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load"); 
-    TsanAtomicLoad[i] = 
-        M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy); 
- 
-    SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store"); 
-    TsanAtomicStore[i] = M.getOrInsertFunction( 
-        AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy); 
- 
-    for (unsigned Op = AtomicRMWInst::FIRST_BINOP; 
-         Op <= AtomicRMWInst::LAST_BINOP; ++Op) { 
-      TsanAtomicRMW[Op][i] = nullptr; 
-      const char *NamePart = nullptr; 
-      if (Op == AtomicRMWInst::Xchg) 
-        NamePart = "_exchange"; 
-      else if (Op == AtomicRMWInst::Add) 
-        NamePart = "_fetch_add"; 
-      else if (Op == AtomicRMWInst::Sub) 
-        NamePart = "_fetch_sub"; 
-      else if (Op == AtomicRMWInst::And) 
-        NamePart = "_fetch_and"; 
-      else if (Op == AtomicRMWInst::Or) 
-        NamePart = "_fetch_or"; 
-      else if (Op == AtomicRMWInst::Xor) 
-        NamePart = "_fetch_xor"; 
-      else if (Op == AtomicRMWInst::Nand) 
-        NamePart = "_fetch_nand"; 
-      else 
-        continue; 
-      SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); 
-      TsanAtomicRMW[Op][i] = 
-          M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy); 
-    } 
- 
-    SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr + 
-                                  "_compare_exchange_val"); 
-    TsanAtomicCAS[i] = M.getOrInsertFunction(AtomicCASName, Attr, Ty, PtrTy, Ty, 
-                                             Ty, OrdTy, OrdTy); 
-  } 
-  TsanVptrUpdate = 
-      M.getOrInsertFunction("__tsan_vptr_update", Attr, IRB.getVoidTy(), 
-                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy()); 
-  TsanVptrLoad = M.getOrInsertFunction("__tsan_vptr_read", Attr, 
-                                       IRB.getVoidTy(), IRB.getInt8PtrTy()); 
-  TsanAtomicThreadFence = M.getOrInsertFunction("__tsan_atomic_thread_fence", 
-                                                Attr, IRB.getVoidTy(), OrdTy); 
-  TsanAtomicSignalFence = M.getOrInsertFunction("__tsan_atomic_signal_fence", 
-                                                Attr, IRB.getVoidTy(), OrdTy); 
- 
-  MemmoveFn = 
-      M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(), 
-                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy); 
-  MemcpyFn = 
-      M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(), 
-                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy); 
-  MemsetFn = 
-      M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(), 
-                            IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy); 
-} 
- 
-static bool isVtableAccess(Instruction *I) { 
-  if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) 
-    return Tag->isTBAAVtableAccess(); 
-  return false; 
-} 
- 
-// Do not instrument known races/"benign races" that come from compiler 
-// instrumentatin. The user has no way of suppressing them. 
-static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) { 
-  // Peel off GEPs and BitCasts. 
-  Addr = Addr->stripInBoundsOffsets(); 
- 
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 
-    if (GV->hasSection()) { 
-      StringRef SectionName = GV->getSection(); 
-      // Check if the global is in the PGO counters section. 
-      auto OF = Triple(M->getTargetTriple()).getObjectFormat(); 
-      if (SectionName.endswith( 
-              getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) 
-        return false; 
-    } 
- 
-    // Check if the global is private gcov data. 
-    if (GV->getName().startswith("__llvm_gcov") || 
-        GV->getName().startswith("__llvm_gcda")) 
-      return false; 
-  } 
- 
-  // Do not instrument acesses from different address spaces; we cannot deal 
-  // with them. 
-  if (Addr) { 
-    Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType()); 
-    if (PtrTy->getPointerAddressSpace() != 0) 
-      return false; 
-  } 
- 
-  return true; 
-} 
- 
-bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { 
-  // If this is a GEP, just analyze its pointer operand. 
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) 
-    Addr = GEP->getPointerOperand(); 
- 
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 
-    if (GV->isConstant()) { 
-      // Reads from constant globals can not race with any writes. 
-      NumOmittedReadsFromConstantGlobals++; 
-      return true; 
-    } 
-  } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) { 
-    if (isVtableAccess(L)) { 
-      // Reads from a vtable pointer can not race with any writes. 
-      NumOmittedReadsFromVtable++; 
-      return true; 
-    } 
-  } 
-  return false; 
-} 
- 
-// Instrumenting some of the accesses may be proven redundant. 
-// Currently handled: 
-//  - read-before-write (within same BB, no calls between) 
-//  - not captured variables 
-// 
-// We do not handle some of the patterns that should not survive 
-// after the classic compiler optimizations. 
-// E.g. two reads from the same temp should be eliminated by CSE, 
-// two writes should be eliminated by DSE, etc. 
-// 
-// 'Local' is a vector of insns within the same BB (no calls between). 
-// 'All' is a vector of insns that will be instrumented. 
-void ThreadSanitizer::chooseInstructionsToInstrument( 
+    Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
+    Type *PtrTy = Ty->getPointerTo();
+    SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
+    TsanAtomicLoad[i] =
+        M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy);
+
+    SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store");
+    TsanAtomicStore[i] = M.getOrInsertFunction(
+        AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy);
+
+    for (unsigned Op = AtomicRMWInst::FIRST_BINOP;
+         Op <= AtomicRMWInst::LAST_BINOP; ++Op) {
+      TsanAtomicRMW[Op][i] = nullptr;
+      const char *NamePart = nullptr;
+      if (Op == AtomicRMWInst::Xchg)
+        NamePart = "_exchange";
+      else if (Op == AtomicRMWInst::Add)
+        NamePart = "_fetch_add";
+      else if (Op == AtomicRMWInst::Sub)
+        NamePart = "_fetch_sub";
+      else if (Op == AtomicRMWInst::And)
+        NamePart = "_fetch_and";
+      else if (Op == AtomicRMWInst::Or)
+        NamePart = "_fetch_or";
+      else if (Op == AtomicRMWInst::Xor)
+        NamePart = "_fetch_xor";
+      else if (Op == AtomicRMWInst::Nand)
+        NamePart = "_fetch_nand";
+      else
+        continue;
+      SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
+      TsanAtomicRMW[Op][i] =
+          M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy);
+    }
+
+    SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr +
+                                  "_compare_exchange_val");
+    TsanAtomicCAS[i] = M.getOrInsertFunction(AtomicCASName, Attr, Ty, PtrTy, Ty,
+                                             Ty, OrdTy, OrdTy);
+  }
+  TsanVptrUpdate =
+      M.getOrInsertFunction("__tsan_vptr_update", Attr, IRB.getVoidTy(),
+                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy());
+  TsanVptrLoad = M.getOrInsertFunction("__tsan_vptr_read", Attr,
+                                       IRB.getVoidTy(), IRB.getInt8PtrTy());
+  TsanAtomicThreadFence = M.getOrInsertFunction("__tsan_atomic_thread_fence",
+                                                Attr, IRB.getVoidTy(), OrdTy);
+  TsanAtomicSignalFence = M.getOrInsertFunction("__tsan_atomic_signal_fence",
+                                                Attr, IRB.getVoidTy(), OrdTy);
+
+  MemmoveFn =
+      M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+  MemcpyFn =
+      M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+  MemsetFn =
+      M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy);
+}
+
+static bool isVtableAccess(Instruction *I) {
+  if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa))
+    return Tag->isTBAAVtableAccess();
+  return false;
+}
+
+// Do not instrument known races/"benign races" that come from compiler
+// instrumentatin. The user has no way of suppressing them.
+static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) {
+  // Peel off GEPs and BitCasts.
+  Addr = Addr->stripInBoundsOffsets();
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+    if (GV->hasSection()) {
+      StringRef SectionName = GV->getSection();
+      // Check if the global is in the PGO counters section.
+      auto OF = Triple(M->getTargetTriple()).getObjectFormat();
+      if (SectionName.endswith(
+              getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
+        return false;
+    }
+
+    // Check if the global is private gcov data.
+    if (GV->getName().startswith("__llvm_gcov") ||
+        GV->getName().startswith("__llvm_gcda"))
+      return false;
+  }
+
+  // Do not instrument acesses from different address spaces; we cannot deal
+  // with them.
+  if (Addr) {
+    Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
+    if (PtrTy->getPointerAddressSpace() != 0)
+      return false;
+  }
+
+  return true;
+}
+
+bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
+  // If this is a GEP, just analyze its pointer operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
+    Addr = GEP->getPointerOperand();
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+    if (GV->isConstant()) {
+      // Reads from constant globals can not race with any writes.
+      NumOmittedReadsFromConstantGlobals++;
+      return true;
+    }
+  } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) {
+    if (isVtableAccess(L)) {
+      // Reads from a vtable pointer can not race with any writes.
+      NumOmittedReadsFromVtable++;
+      return true;
+    }
+  }
+  return false;
+}
+
+// Instrumenting some of the accesses may be proven redundant.
+// Currently handled:
+//  - read-before-write (within same BB, no calls between)
+//  - not captured variables
+//
+// We do not handle some of the patterns that should not survive
+// after the classic compiler optimizations.
+// E.g. two reads from the same temp should be eliminated by CSE,
+// two writes should be eliminated by DSE, etc.
+//
+// 'Local' is a vector of insns within the same BB (no calls between).
+// 'All' is a vector of insns that will be instrumented.
+void ThreadSanitizer::chooseInstructionsToInstrument(
     SmallVectorImpl<Instruction *> &Local,
     SmallVectorImpl<InstructionInfo> &All, const DataLayout &DL) {
   DenseMap<Value *, size_t> WriteTargets; // Map of addresses to index in All
-  // Iterate from the end. 
-  for (Instruction *I : reverse(Local)) { 
+  // Iterate from the end.
+  for (Instruction *I : reverse(Local)) {
     const bool IsWrite = isa<StoreInst>(*I);
     Value *Addr = IsWrite ? cast<StoreInst>(I)->getPointerOperand()
                           : cast<LoadInst>(I)->getPointerOperand();
@@ -470,22 +470,22 @@ void ThreadSanitizer::chooseInstructionsToInstrument(
           NumOmittedReadsBeforeWrite++;
           continue;
         }
-      } 
+      }
 
-      if (addrPointsToConstantData(Addr)) { 
-        // Addr points to some constant data -- it can not race with any writes. 
-        continue; 
-      } 
-    } 
+      if (addrPointsToConstantData(Addr)) {
+        // Addr points to some constant data -- it can not race with any writes.
+        continue;
+      }
+    }
 
     if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
-        !PointerMayBeCaptured(Addr, true, true)) { 
-      // The variable is addressable but not captured, so it cannot be 
-      // referenced from a different thread and participate in a data race 
-      // (see llvm/Analysis/CaptureTracking.h for details). 
-      NumOmittedNonCaptured++; 
-      continue; 
-    } 
+        !PointerMayBeCaptured(Addr, true, true)) {
+      // The variable is addressable but not captured, so it cannot be
+      // referenced from a different thread and participate in a data race
+      // (see llvm/Analysis/CaptureTracking.h for details).
+      NumOmittedNonCaptured++;
+      continue;
+    }
 
     // Instrument this instruction.
     All.emplace_back(I);
@@ -494,160 +494,160 @@ void ThreadSanitizer::chooseInstructionsToInstrument(
       // write target, and we can override any previous entry if it exists.
       WriteTargets[Addr] = All.size() - 1;
     }
-  } 
-  Local.clear(); 
-} 
- 
-static bool isAtomic(Instruction *I) { 
-  // TODO: Ask TTI whether synchronization scope is between threads. 
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) 
-    return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread; 
-  if (StoreInst *SI = dyn_cast<StoreInst>(I)) 
-    return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread; 
-  if (isa<AtomicRMWInst>(I)) 
-    return true; 
-  if (isa<AtomicCmpXchgInst>(I)) 
-    return true; 
-  if (isa<FenceInst>(I)) 
-    return true; 
-  return false; 
-} 
- 
-void ThreadSanitizer::InsertRuntimeIgnores(Function &F) { 
-  IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); 
-  IRB.CreateCall(TsanIgnoreBegin); 
-  EscapeEnumerator EE(F, "tsan_ignore_cleanup", ClHandleCxxExceptions); 
-  while (IRBuilder<> *AtExit = EE.Next()) { 
-    AtExit->CreateCall(TsanIgnoreEnd); 
-  } 
-} 
- 
-bool ThreadSanitizer::sanitizeFunction(Function &F, 
-                                       const TargetLibraryInfo &TLI) { 
-  // This is required to prevent instrumenting call to __tsan_init from within 
-  // the module constructor. 
-  if (F.getName() == kTsanModuleCtorName) 
-    return false; 
-  // Naked functions can not have prologue/epilogue 
-  // (__tsan_func_entry/__tsan_func_exit) generated, so don't instrument them at 
-  // all. 
-  if (F.hasFnAttribute(Attribute::Naked)) 
-    return false; 
-  initialize(*F.getParent()); 
+  }
+  Local.clear();
+}
+
+static bool isAtomic(Instruction *I) {
+  // TODO: Ask TTI whether synchronization scope is between threads.
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread;
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread;
+  if (isa<AtomicRMWInst>(I))
+    return true;
+  if (isa<AtomicCmpXchgInst>(I))
+    return true;
+  if (isa<FenceInst>(I))
+    return true;
+  return false;
+}
+
+void ThreadSanitizer::InsertRuntimeIgnores(Function &F) {
+  IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+  IRB.CreateCall(TsanIgnoreBegin);
+  EscapeEnumerator EE(F, "tsan_ignore_cleanup", ClHandleCxxExceptions);
+  while (IRBuilder<> *AtExit = EE.Next()) {
+    AtExit->CreateCall(TsanIgnoreEnd);
+  }
+}
+
+bool ThreadSanitizer::sanitizeFunction(Function &F,
+                                       const TargetLibraryInfo &TLI) {
+  // This is required to prevent instrumenting call to __tsan_init from within
+  // the module constructor.
+  if (F.getName() == kTsanModuleCtorName)
+    return false;
+  // Naked functions can not have prologue/epilogue
+  // (__tsan_func_entry/__tsan_func_exit) generated, so don't instrument them at
+  // all.
+  if (F.hasFnAttribute(Attribute::Naked))
+    return false;
+  initialize(*F.getParent());
   SmallVector<InstructionInfo, 8> AllLoadsAndStores;
-  SmallVector<Instruction*, 8> LocalLoadsAndStores; 
-  SmallVector<Instruction*, 8> AtomicAccesses; 
-  SmallVector<Instruction*, 8> MemIntrinCalls; 
-  bool Res = false; 
-  bool HasCalls = false; 
-  bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread); 
-  const DataLayout &DL = F.getParent()->getDataLayout(); 
- 
-  // Traverse all instructions, collect loads/stores/returns, check for calls. 
-  for (auto &BB : F) { 
-    for (auto &Inst : BB) { 
-      if (isAtomic(&Inst)) 
-        AtomicAccesses.push_back(&Inst); 
-      else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) 
-        LocalLoadsAndStores.push_back(&Inst); 
-      else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) { 
-        if (CallInst *CI = dyn_cast<CallInst>(&Inst)) 
-          maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI); 
-        if (isa<MemIntrinsic>(Inst)) 
-          MemIntrinCalls.push_back(&Inst); 
-        HasCalls = true; 
-        chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, 
-                                       DL); 
-      } 
-    } 
-    chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, DL); 
-  } 
- 
-  // We have collected all loads and stores. 
-  // FIXME: many of these accesses do not need to be checked for races 
-  // (e.g. variables that do not escape, etc). 
- 
-  // Instrument memory accesses only if we want to report bugs in the function. 
-  if (ClInstrumentMemoryAccesses && SanitizeFunction) 
+  SmallVector<Instruction*, 8> LocalLoadsAndStores;
+  SmallVector<Instruction*, 8> AtomicAccesses;
+  SmallVector<Instruction*, 8> MemIntrinCalls;
+  bool Res = false;
+  bool HasCalls = false;
+  bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread);
+  const DataLayout &DL = F.getParent()->getDataLayout();
+
+  // Traverse all instructions, collect loads/stores/returns, check for calls.
+  for (auto &BB : F) {
+    for (auto &Inst : BB) {
+      if (isAtomic(&Inst))
+        AtomicAccesses.push_back(&Inst);
+      else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
+        LocalLoadsAndStores.push_back(&Inst);
+      else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
+        if (CallInst *CI = dyn_cast<CallInst>(&Inst))
+          maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
+        if (isa<MemIntrinsic>(Inst))
+          MemIntrinCalls.push_back(&Inst);
+        HasCalls = true;
+        chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores,
+                                       DL);
+      }
+    }
+    chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, DL);
+  }
+
+  // We have collected all loads and stores.
+  // FIXME: many of these accesses do not need to be checked for races
+  // (e.g. variables that do not escape, etc).
+
+  // Instrument memory accesses only if we want to report bugs in the function.
+  if (ClInstrumentMemoryAccesses && SanitizeFunction)
     for (const auto &II : AllLoadsAndStores) {
       Res |= instrumentLoadOrStore(II, DL);
-    } 
- 
-  // Instrument atomic memory accesses in any case (they can be used to 
-  // implement synchronization). 
-  if (ClInstrumentAtomics) 
-    for (auto Inst : AtomicAccesses) { 
-      Res |= instrumentAtomic(Inst, DL); 
-    } 
- 
-  if (ClInstrumentMemIntrinsics && SanitizeFunction) 
-    for (auto Inst : MemIntrinCalls) { 
-      Res |= instrumentMemIntrinsic(Inst); 
-    } 
- 
-  if (F.hasFnAttribute("sanitize_thread_no_checking_at_run_time")) { 
-    assert(!F.hasFnAttribute(Attribute::SanitizeThread)); 
-    if (HasCalls) 
-      InsertRuntimeIgnores(F); 
-  } 
- 
-  // Instrument function entry/exit points if there were instrumented accesses. 
-  if ((Res || HasCalls) && ClInstrumentFuncEntryExit) { 
-    IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); 
-    Value *ReturnAddress = IRB.CreateCall( 
-        Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress), 
-        IRB.getInt32(0)); 
-    IRB.CreateCall(TsanFuncEntry, ReturnAddress); 
- 
-    EscapeEnumerator EE(F, "tsan_cleanup", ClHandleCxxExceptions); 
-    while (IRBuilder<> *AtExit = EE.Next()) { 
-      AtExit->CreateCall(TsanFuncExit, {}); 
-    } 
-    Res = true; 
-  } 
-  return Res; 
-} 
- 
+    }
+
+  // Instrument atomic memory accesses in any case (they can be used to
+  // implement synchronization).
+  if (ClInstrumentAtomics)
+    for (auto Inst : AtomicAccesses) {
+      Res |= instrumentAtomic(Inst, DL);
+    }
+
+  if (ClInstrumentMemIntrinsics && SanitizeFunction)
+    for (auto Inst : MemIntrinCalls) {
+      Res |= instrumentMemIntrinsic(Inst);
+    }
+
+  if (F.hasFnAttribute("sanitize_thread_no_checking_at_run_time")) {
+    assert(!F.hasFnAttribute(Attribute::SanitizeThread));
+    if (HasCalls)
+      InsertRuntimeIgnores(F);
+  }
+
+  // Instrument function entry/exit points if there were instrumented accesses.
+  if ((Res || HasCalls) && ClInstrumentFuncEntryExit) {
+    IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+    Value *ReturnAddress = IRB.CreateCall(
+        Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress),
+        IRB.getInt32(0));
+    IRB.CreateCall(TsanFuncEntry, ReturnAddress);
+
+    EscapeEnumerator EE(F, "tsan_cleanup", ClHandleCxxExceptions);
+    while (IRBuilder<> *AtExit = EE.Next()) {
+      AtExit->CreateCall(TsanFuncExit, {});
+    }
+    Res = true;
+  }
+  return Res;
+}
+
 bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II,
-                                            const DataLayout &DL) { 
+                                            const DataLayout &DL) {
   IRBuilder<> IRB(II.Inst);
   const bool IsWrite = isa<StoreInst>(*II.Inst);
   Value *Addr = IsWrite ? cast<StoreInst>(II.Inst)->getPointerOperand()
                         : cast<LoadInst>(II.Inst)->getPointerOperand();
- 
-  // swifterror memory addresses are mem2reg promoted by instruction selection. 
-  // As such they cannot have regular uses like an instrumentation function and 
-  // it makes no sense to track them as memory. 
-  if (Addr->isSwiftError()) 
-    return false; 
- 
-  int Idx = getMemoryAccessFuncIndex(Addr, DL); 
-  if (Idx < 0) 
-    return false; 
+
+  // swifterror memory addresses are mem2reg promoted by instruction selection.
+  // As such they cannot have regular uses like an instrumentation function and
+  // it makes no sense to track them as memory.
+  if (Addr->isSwiftError())
+    return false;
+
+  int Idx = getMemoryAccessFuncIndex(Addr, DL);
+  if (Idx < 0)
+    return false;
   if (IsWrite && isVtableAccess(II.Inst)) {
     LLVM_DEBUG(dbgs() << "  VPTR : " << *II.Inst << "\n");
     Value *StoredValue = cast<StoreInst>(II.Inst)->getValueOperand();
-    // StoredValue may be a vector type if we are storing several vptrs at once. 
-    // In this case, just take the first element of the vector since this is 
-    // enough to find vptr races. 
-    if (isa<VectorType>(StoredValue->getType())) 
-      StoredValue = IRB.CreateExtractElement( 
-          StoredValue, ConstantInt::get(IRB.getInt32Ty(), 0)); 
-    if (StoredValue->getType()->isIntegerTy()) 
-      StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy()); 
-    // Call TsanVptrUpdate. 
-    IRB.CreateCall(TsanVptrUpdate, 
-                   {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), 
-                    IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())}); 
-    NumInstrumentedVtableWrites++; 
-    return true; 
-  } 
+    // StoredValue may be a vector type if we are storing several vptrs at once.
+    // In this case, just take the first element of the vector since this is
+    // enough to find vptr races.
+    if (isa<VectorType>(StoredValue->getType()))
+      StoredValue = IRB.CreateExtractElement(
+          StoredValue, ConstantInt::get(IRB.getInt32Ty(), 0));
+    if (StoredValue->getType()->isIntegerTy())
+      StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy());
+    // Call TsanVptrUpdate.
+    IRB.CreateCall(TsanVptrUpdate,
+                   {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+                    IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())});
+    NumInstrumentedVtableWrites++;
+    return true;
+  }
   if (!IsWrite && isVtableAccess(II.Inst)) {
-    IRB.CreateCall(TsanVptrLoad, 
-                   IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); 
-    NumInstrumentedVtableReads++; 
-    return true; 
-  } 
+    IRB.CreateCall(TsanVptrLoad,
+                   IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+    NumInstrumentedVtableReads++;
+    return true;
+  }
 
   const unsigned Alignment = IsWrite ? cast<StoreInst>(II.Inst)->getAlignment()
                                      : cast<LoadInst>(II.Inst)->getAlignment();
@@ -658,191 +658,191 @@ bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II,
                                    : cast<LoadInst>(II.Inst)->isVolatile());
   assert((!IsVolatile || !IsCompoundRW) && "Compound volatile invalid!");
 
-  Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); 
-  const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); 
-  FunctionCallee OnAccessFunc = nullptr; 
-  if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) { 
+  Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
+  const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
+  FunctionCallee OnAccessFunc = nullptr;
+  if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) {
     if (IsCompoundRW)
       OnAccessFunc = TsanCompoundRW[Idx];
     else if (IsVolatile)
-      OnAccessFunc = IsWrite ? TsanVolatileWrite[Idx] : TsanVolatileRead[Idx]; 
-    else 
-      OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; 
-  } else { 
+      OnAccessFunc = IsWrite ? TsanVolatileWrite[Idx] : TsanVolatileRead[Idx];
+    else
+      OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
+  } else {
     if (IsCompoundRW)
       OnAccessFunc = TsanUnalignedCompoundRW[Idx];
     else if (IsVolatile)
-      OnAccessFunc = IsWrite ? TsanUnalignedVolatileWrite[Idx] 
-                             : TsanUnalignedVolatileRead[Idx]; 
-    else 
-      OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx]; 
-  } 
-  IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); 
+      OnAccessFunc = IsWrite ? TsanUnalignedVolatileWrite[Idx]
+                             : TsanUnalignedVolatileRead[Idx];
+    else
+      OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx];
+  }
+  IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
   if (IsCompoundRW || IsWrite)
     NumInstrumentedWrites++;
   if (IsCompoundRW || !IsWrite)
     NumInstrumentedReads++;
-  return true; 
-} 
- 
-static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { 
-  uint32_t v = 0; 
-  switch (ord) { 
-    case AtomicOrdering::NotAtomic: 
-      llvm_unreachable("unexpected atomic ordering!"); 
-    case AtomicOrdering::Unordered:              LLVM_FALLTHROUGH; 
-    case AtomicOrdering::Monotonic:              v = 0; break; 
-    // Not specified yet: 
-    // case AtomicOrdering::Consume:                v = 1; break; 
-    case AtomicOrdering::Acquire:                v = 2; break; 
-    case AtomicOrdering::Release:                v = 3; break; 
-    case AtomicOrdering::AcquireRelease:         v = 4; break; 
-    case AtomicOrdering::SequentiallyConsistent: v = 5; break; 
-  } 
-  return IRB->getInt32(v); 
-} 
- 
-// If a memset intrinsic gets inlined by the code gen, we will miss races on it. 
-// So, we either need to ensure the intrinsic is not inlined, or instrument it. 
-// We do not instrument memset/memmove/memcpy intrinsics (too complicated), 
-// instead we simply replace them with regular function calls, which are then 
-// intercepted by the run-time. 
-// Since tsan is running after everyone else, the calls should not be 
-// replaced back with intrinsics. If that becomes wrong at some point, 
-// we will need to call e.g. __tsan_memset to avoid the intrinsics. 
-bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) { 
-  IRBuilder<> IRB(I); 
-  if (MemSetInst *M = dyn_cast<MemSetInst>(I)) { 
-    IRB.CreateCall( 
-        MemsetFn, 
-        {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), 
-         IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false), 
-         IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); 
-    I->eraseFromParent(); 
-  } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) { 
-    IRB.CreateCall( 
-        isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn, 
-        {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), 
-         IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()), 
-         IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); 
-    I->eraseFromParent(); 
-  } 
-  return false; 
-} 
- 
-// Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x 
-// standards.  For background see C++11 standard.  A slightly older, publicly 
-// available draft of the standard (not entirely up-to-date, but close enough 
-// for casual browsing) is available here: 
-// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf 
-// The following page contains more background information: 
-// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/ 
- 
-bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { 
-  IRBuilder<> IRB(I); 
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 
-    Value *Addr = LI->getPointerOperand(); 
-    int Idx = getMemoryAccessFuncIndex(Addr, DL); 
-    if (Idx < 0) 
-      return false; 
-    const unsigned ByteSize = 1U << Idx; 
-    const unsigned BitSize = ByteSize * 8; 
-    Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); 
-    Type *PtrTy = Ty->getPointerTo(); 
-    Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), 
-                     createOrdering(&IRB, LI->getOrdering())}; 
-    Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); 
-    Value *C = IRB.CreateCall(TsanAtomicLoad[Idx], Args); 
-    Value *Cast = IRB.CreateBitOrPointerCast(C, OrigTy); 
-    I->replaceAllUsesWith(Cast); 
-  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 
-    Value *Addr = SI->getPointerOperand(); 
-    int Idx = getMemoryAccessFuncIndex(Addr, DL); 
-    if (Idx < 0) 
-      return false; 
-    const unsigned ByteSize = 1U << Idx; 
-    const unsigned BitSize = ByteSize * 8; 
-    Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); 
-    Type *PtrTy = Ty->getPointerTo(); 
-    Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), 
-                     IRB.CreateBitOrPointerCast(SI->getValueOperand(), Ty), 
-                     createOrdering(&IRB, SI->getOrdering())}; 
-    CallInst *C = CallInst::Create(TsanAtomicStore[Idx], Args); 
-    ReplaceInstWithInst(I, C); 
-  } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) { 
-    Value *Addr = RMWI->getPointerOperand(); 
-    int Idx = getMemoryAccessFuncIndex(Addr, DL); 
-    if (Idx < 0) 
-      return false; 
-    FunctionCallee F = TsanAtomicRMW[RMWI->getOperation()][Idx]; 
-    if (!F) 
-      return false; 
-    const unsigned ByteSize = 1U << Idx; 
-    const unsigned BitSize = ByteSize * 8; 
-    Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); 
-    Type *PtrTy = Ty->getPointerTo(); 
-    Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), 
-                     IRB.CreateIntCast(RMWI->getValOperand(), Ty, false), 
-                     createOrdering(&IRB, RMWI->getOrdering())}; 
-    CallInst *C = CallInst::Create(F, Args); 
-    ReplaceInstWithInst(I, C); 
-  } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) { 
-    Value *Addr = CASI->getPointerOperand(); 
-    int Idx = getMemoryAccessFuncIndex(Addr, DL); 
-    if (Idx < 0) 
-      return false; 
-    const unsigned ByteSize = 1U << Idx; 
-    const unsigned BitSize = ByteSize * 8; 
-    Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); 
-    Type *PtrTy = Ty->getPointerTo(); 
-    Value *CmpOperand = 
-      IRB.CreateBitOrPointerCast(CASI->getCompareOperand(), Ty); 
-    Value *NewOperand = 
-      IRB.CreateBitOrPointerCast(CASI->getNewValOperand(), Ty); 
-    Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), 
-                     CmpOperand, 
-                     NewOperand, 
-                     createOrdering(&IRB, CASI->getSuccessOrdering()), 
-                     createOrdering(&IRB, CASI->getFailureOrdering())}; 
-    CallInst *C = IRB.CreateCall(TsanAtomicCAS[Idx], Args); 
-    Value *Success = IRB.CreateICmpEQ(C, CmpOperand); 
-    Value *OldVal = C; 
-    Type *OrigOldValTy = CASI->getNewValOperand()->getType(); 
-    if (Ty != OrigOldValTy) { 
-      // The value is a pointer, so we need to cast the return value. 
-      OldVal = IRB.CreateIntToPtr(C, OrigOldValTy); 
-    } 
- 
-    Value *Res = 
-      IRB.CreateInsertValue(UndefValue::get(CASI->getType()), OldVal, 0); 
-    Res = IRB.CreateInsertValue(Res, Success, 1); 
- 
-    I->replaceAllUsesWith(Res); 
-    I->eraseFromParent(); 
-  } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) { 
-    Value *Args[] = {createOrdering(&IRB, FI->getOrdering())}; 
-    FunctionCallee F = FI->getSyncScopeID() == SyncScope::SingleThread 
-                           ? TsanAtomicSignalFence 
-                           : TsanAtomicThreadFence; 
-    CallInst *C = CallInst::Create(F, Args); 
-    ReplaceInstWithInst(I, C); 
-  } 
-  return true; 
-} 
- 
-int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr, 
-                                              const DataLayout &DL) { 
-  Type *OrigPtrTy = Addr->getType(); 
-  Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); 
-  assert(OrigTy->isSized()); 
-  uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); 
-  if (TypeSize != 8  && TypeSize != 16 && 
-      TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { 
-    NumAccessesWithBadSize++; 
-    // Ignore all unusual sizes. 
-    return -1; 
-  } 
-  size_t Idx = countTrailingZeros(TypeSize / 8); 
-  assert(Idx < kNumberOfAccessSizes); 
-  return Idx; 
-} 
+  return true;
+}
+
+static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
+  uint32_t v = 0;
+  switch (ord) {
+    case AtomicOrdering::NotAtomic:
+      llvm_unreachable("unexpected atomic ordering!");
+    case AtomicOrdering::Unordered:              LLVM_FALLTHROUGH;
+    case AtomicOrdering::Monotonic:              v = 0; break;
+    // Not specified yet:
+    // case AtomicOrdering::Consume:                v = 1; break;
+    case AtomicOrdering::Acquire:                v = 2; break;
+    case AtomicOrdering::Release:                v = 3; break;
+    case AtomicOrdering::AcquireRelease:         v = 4; break;
+    case AtomicOrdering::SequentiallyConsistent: v = 5; break;
+  }
+  return IRB->getInt32(v);
+}
+
+// If a memset intrinsic gets inlined by the code gen, we will miss races on it.
+// So, we either need to ensure the intrinsic is not inlined, or instrument it.
+// We do not instrument memset/memmove/memcpy intrinsics (too complicated),
+// instead we simply replace them with regular function calls, which are then
+// intercepted by the run-time.
+// Since tsan is running after everyone else, the calls should not be
+// replaced back with intrinsics. If that becomes wrong at some point,
+// we will need to call e.g. __tsan_memset to avoid the intrinsics.
+bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
+  IRBuilder<> IRB(I);
+  if (MemSetInst *M = dyn_cast<MemSetInst>(I)) {
+    IRB.CreateCall(
+        MemsetFn,
+        {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
+         IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
+    I->eraseFromParent();
+  } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) {
+    IRB.CreateCall(
+        isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
+        {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+         IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
+         IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
+    I->eraseFromParent();
+  }
+  return false;
+}
+
+// Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x
+// standards.  For background see C++11 standard.  A slightly older, publicly
+// available draft of the standard (not entirely up-to-date, but close enough
+// for casual browsing) is available here:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf
+// The following page contains more background information:
+// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
+
+bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
+  IRBuilder<> IRB(I);
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    Value *Addr = LI->getPointerOperand();
+    int Idx = getMemoryAccessFuncIndex(Addr, DL);
+    if (Idx < 0)
+      return false;
+    const unsigned ByteSize = 1U << Idx;
+    const unsigned BitSize = ByteSize * 8;
+    Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+    Type *PtrTy = Ty->getPointerTo();
+    Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+                     createOrdering(&IRB, LI->getOrdering())};
+    Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
+    Value *C = IRB.CreateCall(TsanAtomicLoad[Idx], Args);
+    Value *Cast = IRB.CreateBitOrPointerCast(C, OrigTy);
+    I->replaceAllUsesWith(Cast);
+  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    Value *Addr = SI->getPointerOperand();
+    int Idx = getMemoryAccessFuncIndex(Addr, DL);
+    if (Idx < 0)
+      return false;
+    const unsigned ByteSize = 1U << Idx;
+    const unsigned BitSize = ByteSize * 8;
+    Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+    Type *PtrTy = Ty->getPointerTo();
+    Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+                     IRB.CreateBitOrPointerCast(SI->getValueOperand(), Ty),
+                     createOrdering(&IRB, SI->getOrdering())};
+    CallInst *C = CallInst::Create(TsanAtomicStore[Idx], Args);
+    ReplaceInstWithInst(I, C);
+  } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) {
+    Value *Addr = RMWI->getPointerOperand();
+    int Idx = getMemoryAccessFuncIndex(Addr, DL);
+    if (Idx < 0)
+      return false;
+    FunctionCallee F = TsanAtomicRMW[RMWI->getOperation()][Idx];
+    if (!F)
+      return false;
+    const unsigned ByteSize = 1U << Idx;
+    const unsigned BitSize = ByteSize * 8;
+    Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+    Type *PtrTy = Ty->getPointerTo();
+    Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+                     IRB.CreateIntCast(RMWI->getValOperand(), Ty, false),
+                     createOrdering(&IRB, RMWI->getOrdering())};
+    CallInst *C = CallInst::Create(F, Args);
+    ReplaceInstWithInst(I, C);
+  } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
+    Value *Addr = CASI->getPointerOperand();
+    int Idx = getMemoryAccessFuncIndex(Addr, DL);
+    if (Idx < 0)
+      return false;
+    const unsigned ByteSize = 1U << Idx;
+    const unsigned BitSize = ByteSize * 8;
+    Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+    Type *PtrTy = Ty->getPointerTo();
+    Value *CmpOperand =
+      IRB.CreateBitOrPointerCast(CASI->getCompareOperand(), Ty);
+    Value *NewOperand =
+      IRB.CreateBitOrPointerCast(CASI->getNewValOperand(), Ty);
+    Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+                     CmpOperand,
+                     NewOperand,
+                     createOrdering(&IRB, CASI->getSuccessOrdering()),
+                     createOrdering(&IRB, CASI->getFailureOrdering())};
+    CallInst *C = IRB.CreateCall(TsanAtomicCAS[Idx], Args);
+    Value *Success = IRB.CreateICmpEQ(C, CmpOperand);
+    Value *OldVal = C;
+    Type *OrigOldValTy = CASI->getNewValOperand()->getType();
+    if (Ty != OrigOldValTy) {
+      // The value is a pointer, so we need to cast the return value.
+      OldVal = IRB.CreateIntToPtr(C, OrigOldValTy);
+    }
+
+    Value *Res =
+      IRB.CreateInsertValue(UndefValue::get(CASI->getType()), OldVal, 0);
+    Res = IRB.CreateInsertValue(Res, Success, 1);
+
+    I->replaceAllUsesWith(Res);
+    I->eraseFromParent();
+  } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
+    Value *Args[] = {createOrdering(&IRB, FI->getOrdering())};
+    FunctionCallee F = FI->getSyncScopeID() == SyncScope::SingleThread
+                           ? TsanAtomicSignalFence
+                           : TsanAtomicThreadFence;
+    CallInst *C = CallInst::Create(F, Args);
+    ReplaceInstWithInst(I, C);
+  }
+  return true;
+}
+
+int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr,
+                                              const DataLayout &DL) {
+  Type *OrigPtrTy = Addr->getType();
+  Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
+  assert(OrigTy->isSized());
+  uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
+  if (TypeSize != 8  && TypeSize != 16 &&
+      TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
+    NumAccessesWithBadSize++;
+    // Ignore all unusual sizes.
+    return -1;
+  }
+  size_t Idx = countTrailingZeros(TypeSize / 8);
+  assert(Idx < kNumberOfAccessSizes);
+  return Idx;
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfileCollector.cpp b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
index d7d10fb5d5..fb6216bb21 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
@@ -1,80 +1,80 @@
-//===- ValueProfileCollector.cpp - determine what to value profile --------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// The implementation of the ValueProfileCollector via ValueProfileCollectorImpl 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "ValueProfilePlugins.inc" 
+//===- ValueProfileCollector.cpp - determine what to value profile --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation of the ValueProfileCollector via ValueProfileCollectorImpl
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueProfilePlugins.inc"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h" 
-#include "llvm/IR/IntrinsicInst.h" 
-#include "llvm/InitializePasses.h" 
-#include <cassert> 
- 
-using namespace llvm; 
- 
-namespace { 
- 
-/// A plugin-based class that takes an arbitrary number of Plugin types. 
-/// Each plugin type must satisfy the following API: 
-///  1) the constructor must take a `Function &f`. Typically, the plugin would 
-///     scan the function looking for candidates. 
-///  2) contain a member function with the following signature and name: 
-///        void run(std::vector<CandidateInfo> &Candidates); 
-///    such that the plugin would append its result into the vector parameter. 
-/// 
-/// Plugins are defined in ValueProfilePlugins.inc 
-template <class... Ts> class PluginChain; 
- 
-/// The type PluginChainFinal is the final chain of plugins that will be used by 
-/// ValueProfileCollectorImpl. 
-using PluginChainFinal = PluginChain<VP_PLUGIN_LIST>; 
- 
-template <> class PluginChain<> { 
-public: 
-  PluginChain(Function &F, TargetLibraryInfo &TLI) {} 
-  void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) {} 
-}; 
- 
-template <class PluginT, class... Ts> 
-class PluginChain<PluginT, Ts...> : public PluginChain<Ts...> { 
-  PluginT Plugin; 
-  using Base = PluginChain<Ts...>; 
- 
-public: 
-  PluginChain(Function &F, TargetLibraryInfo &TLI) 
-      : PluginChain<Ts...>(F, TLI), Plugin(F, TLI) {} 
- 
-  void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) { 
-    if (K == PluginT::Kind) 
-      Plugin.run(Candidates); 
-    Base::get(K, Candidates); 
-  } 
-}; 
- 
-} // end anonymous namespace 
- 
-/// ValueProfileCollectorImpl inherits the API of PluginChainFinal. 
-class ValueProfileCollector::ValueProfileCollectorImpl : public PluginChainFinal { 
-public: 
-  using PluginChainFinal::PluginChainFinal; 
-}; 
- 
-ValueProfileCollector::ValueProfileCollector(Function &F, 
-                                             TargetLibraryInfo &TLI) 
-    : PImpl(new ValueProfileCollectorImpl(F, TLI)) {} 
- 
-ValueProfileCollector::~ValueProfileCollector() = default; 
- 
-std::vector<CandidateInfo> 
-ValueProfileCollector::get(InstrProfValueKind Kind) const { 
-  std::vector<CandidateInfo> Result; 
-  PImpl->get(Kind, Result); 
-  return Result; 
-} 
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
+#include <cassert>
+
+using namespace llvm;
+
+namespace {
+
+/// A plugin-based class that takes an arbitrary number of Plugin types.
+/// Each plugin type must satisfy the following API:
+///  1) the constructor must take a `Function &f`. Typically, the plugin would
+///     scan the function looking for candidates.
+///  2) contain a member function with the following signature and name:
+///        void run(std::vector<CandidateInfo> &Candidates);
+///    such that the plugin would append its result into the vector parameter.
+///
+/// Plugins are defined in ValueProfilePlugins.inc
+template <class... Ts> class PluginChain;
+
+/// The type PluginChainFinal is the final chain of plugins that will be used by
+/// ValueProfileCollectorImpl.
+using PluginChainFinal = PluginChain<VP_PLUGIN_LIST>;
+
+template <> class PluginChain<> {
+public:
+  PluginChain(Function &F, TargetLibraryInfo &TLI) {}
+  void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) {}
+};
+
+template <class PluginT, class... Ts>
+class PluginChain<PluginT, Ts...> : public PluginChain<Ts...> {
+  PluginT Plugin;
+  using Base = PluginChain<Ts...>;
+
+public:
+  PluginChain(Function &F, TargetLibraryInfo &TLI)
+      : PluginChain<Ts...>(F, TLI), Plugin(F, TLI) {}
+
+  void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) {
+    if (K == PluginT::Kind)
+      Plugin.run(Candidates);
+    Base::get(K, Candidates);
+  }
+};
+
+} // end anonymous namespace
+
+/// ValueProfileCollectorImpl inherits the API of PluginChainFinal.
+class ValueProfileCollector::ValueProfileCollectorImpl : public PluginChainFinal {
+public:
+  using PluginChainFinal::PluginChainFinal;
+};
+
+ValueProfileCollector::ValueProfileCollector(Function &F,
+                                             TargetLibraryInfo &TLI)
+    : PImpl(new ValueProfileCollectorImpl(F, TLI)) {}
+
+ValueProfileCollector::~ValueProfileCollector() = default;
+
+std::vector<CandidateInfo>
+ValueProfileCollector::get(InstrProfValueKind Kind) const {
+  std::vector<CandidateInfo> Result;
+  PImpl->get(Kind, Result);
+  return Result;
+}
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfileCollector.h b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfileCollector.h
index 40f5006007..584a60ab45 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfileCollector.h
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfileCollector.h
@@ -1,83 +1,83 @@
-//===- ValueProfileCollector.h - determine what to value profile ----------===// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file contains a utility class, ValueProfileCollector, that is used to 
-// determine what kind of llvm::Value's are worth value-profiling, at which 
-// point in the program, and which instruction holds the Value Profile metadata. 
-// Currently, the only users of this utility is the PGOInstrumentation[Gen|Use] 
-// passes. 
-//===----------------------------------------------------------------------===// 
- 
-#ifndef LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H 
-#define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H 
- 
-#include "llvm/Analysis/TargetLibraryInfo.h" 
-#include "llvm/ProfileData/InstrProf.h" 
+//===- ValueProfileCollector.h - determine what to value profile ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a utility class, ValueProfileCollector, that is used to
+// determine what kind of llvm::Value's are worth value-profiling, at which
+// point in the program, and which instruction holds the Value Profile metadata.
+// Currently, the only users of this utility is the PGOInstrumentation[Gen|Use]
+// passes.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H
+#define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H
+
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/ProfileData/InstrProf.h"
 #include <memory>
 #include <vector>
- 
-namespace llvm { 
- 
+
+namespace llvm {
+
 class Function;
 class Instruction;
 class Value;
 
-/// Utility analysis that determines what values are worth profiling. 
-/// The actual logic is inside the ValueProfileCollectorImpl, whose job is to 
-/// populate the Candidates vector. 
-/// 
-/// Value profiling an expression means to track the values that this expression 
-/// takes at runtime and the frequency of each value. 
-/// It is important to distinguish between two sets of value profiles for a 
-/// particular expression: 
-///  1) The set of values at the point of evaluation. 
-///  2) The set of values at the point of use. 
-/// In some cases, the two sets are identical, but it's not unusual for the two 
-/// to differ. 
-/// 
-/// To elaborate more, consider this C code, and focus on the expression `nn`: 
-///  void foo(int nn, bool b) { 
-///    if (b)  memcpy(x, y, nn); 
-///  } 
-/// The point of evaluation can be as early as the start of the function, and 
-/// let's say the value profile for `nn` is: 
-///     total=100; (value,freq) set = {(8,10), (32,50)} 
-/// The point of use is right before we call memcpy, and since we execute the 
-/// memcpy conditionally, the value profile of `nn` can be: 
-///     total=15; (value,freq) set = {(8,10), (4,5)} 
-/// 
-/// For this reason, a plugin is responsible for computing the insertion point 
-/// for each value to be profiled. The `CandidateInfo` structure encapsulates 
-/// all the information needed for each value profile site. 
-class ValueProfileCollector { 
-public: 
-  struct CandidateInfo { 
-    Value *V;                   // The value to profile. 
-    Instruction *InsertPt;      // Insert the VP lib call before this instr. 
-    Instruction *AnnotatedInst; // Where metadata is attached. 
-  }; 
- 
-  ValueProfileCollector(Function &Fn, TargetLibraryInfo &TLI); 
-  ValueProfileCollector(ValueProfileCollector &&) = delete; 
-  ValueProfileCollector &operator=(ValueProfileCollector &&) = delete; 
- 
-  ValueProfileCollector(const ValueProfileCollector &) = delete; 
-  ValueProfileCollector &operator=(const ValueProfileCollector &) = delete; 
-  ~ValueProfileCollector(); 
- 
-  /// returns a list of value profiling candidates of the given kind 
-  std::vector<CandidateInfo> get(InstrProfValueKind Kind) const; 
- 
-private: 
-  class ValueProfileCollectorImpl; 
-  std::unique_ptr<ValueProfileCollectorImpl> PImpl; 
-}; 
- 
-} // namespace llvm 
- 
-#endif 
+/// Utility analysis that determines what values are worth profiling.
+/// The actual logic is inside the ValueProfileCollectorImpl, whose job is to
+/// populate the Candidates vector.
+///
+/// Value profiling an expression means to track the values that this expression
+/// takes at runtime and the frequency of each value.
+/// It is important to distinguish between two sets of value profiles for a
+/// particular expression:
+///  1) The set of values at the point of evaluation.
+///  2) The set of values at the point of use.
+/// In some cases, the two sets are identical, but it's not unusual for the two
+/// to differ.
+///
+/// To elaborate more, consider this C code, and focus on the expression `nn`:
+///  void foo(int nn, bool b) {
+///    if (b)  memcpy(x, y, nn);
+///  }
+/// The point of evaluation can be as early as the start of the function, and
+/// let's say the value profile for `nn` is:
+///     total=100; (value,freq) set = {(8,10), (32,50)}
+/// The point of use is right before we call memcpy, and since we execute the
+/// memcpy conditionally, the value profile of `nn` can be:
+///     total=15; (value,freq) set = {(8,10), (4,5)}
+///
+/// For this reason, a plugin is responsible for computing the insertion point
+/// for each value to be profiled. The `CandidateInfo` structure encapsulates
+/// all the information needed for each value profile site.
+class ValueProfileCollector {
+public:
+  struct CandidateInfo {
+    Value *V;                   // The value to profile.
+    Instruction *InsertPt;      // Insert the VP lib call before this instr.
+    Instruction *AnnotatedInst; // Where metadata is attached.
+  };
+
+  ValueProfileCollector(Function &Fn, TargetLibraryInfo &TLI);
+  ValueProfileCollector(ValueProfileCollector &&) = delete;
+  ValueProfileCollector &operator=(ValueProfileCollector &&) = delete;
+
+  ValueProfileCollector(const ValueProfileCollector &) = delete;
+  ValueProfileCollector &operator=(const ValueProfileCollector &) = delete;
+  ~ValueProfileCollector();
+
+  /// returns a list of value profiling candidates of the given kind
+  std::vector<CandidateInfo> get(InstrProfValueKind Kind) const;
+
+private:
+  class ValueProfileCollectorImpl;
+  std::unique_ptr<ValueProfileCollectorImpl> PImpl;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
index 0277494895..8d0cf5843e 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
@@ -1,97 +1,97 @@
-//=== ValueProfilePlugins.inc - set of plugins used by ValueProfileCollector =// 
-// 
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
-// See https://llvm.org/LICENSE.txt for license information. 
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
-// 
-//===----------------------------------------------------------------------===// 
-// 
-// This file contains a set of plugin classes used in ValueProfileCollectorImpl. 
-// Each plugin is responsible for collecting Value Profiling candidates for a 
-// particular optimization. 
-// Each plugin must satisfy the interface described in ValueProfileCollector.cpp 
-// 
-//===----------------------------------------------------------------------===// 
- 
-#include "ValueProfileCollector.h" 
-#include "llvm/Analysis/IndirectCallVisitor.h" 
-#include "llvm/IR/InstVisitor.h" 
- 
-using namespace llvm; 
-using CandidateInfo = ValueProfileCollector::CandidateInfo; 
- 
-extern cl::opt<bool> MemOPOptMemcmpBcmp; 
- 
-///--------------------------- MemIntrinsicPlugin ------------------------------ 
-class MemIntrinsicPlugin : public InstVisitor<MemIntrinsicPlugin> { 
-  Function &F; 
-  TargetLibraryInfo &TLI; 
-  std::vector<CandidateInfo> *Candidates; 
- 
-public: 
-  static constexpr InstrProfValueKind Kind = IPVK_MemOPSize; 
- 
-  MemIntrinsicPlugin(Function &Fn, TargetLibraryInfo &TLI) 
-      : F(Fn), TLI(TLI), Candidates(nullptr) {} 
- 
-  void run(std::vector<CandidateInfo> &Cs) { 
-    Candidates = &Cs; 
-    visit(F); 
-    Candidates = nullptr; 
-  } 
-  void visitMemIntrinsic(MemIntrinsic &MI) { 
-    Value *Length = MI.getLength(); 
-    // Not instrument constant length calls. 
-    if (dyn_cast<ConstantInt>(Length)) 
-      return; 
- 
-    Instruction *InsertPt = &MI; 
-    Instruction *AnnotatedInst = &MI; 
-    Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst}); 
-  } 
-  void visitCallInst(CallInst &CI) { 
-    if (!MemOPOptMemcmpBcmp) 
-      return; 
-    auto *F = CI.getCalledFunction(); 
-    if (!F) 
-      return; 
-    LibFunc Func; 
-    if (TLI.getLibFunc(CI, Func) && 
-        (Func == LibFunc_memcmp || Func == LibFunc_bcmp)) { 
-      Value *Length = CI.getArgOperand(2); 
-      // Not instrument constant length calls. 
-      if (dyn_cast<ConstantInt>(Length)) 
-        return; 
-      Instruction *InsertPt = &CI; 
-      Instruction *AnnotatedInst = &CI; 
-      Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst}); 
-    } 
-  } 
-}; 
- 
-///------------------------ IndirectCallPromotionPlugin ------------------------ 
-class IndirectCallPromotionPlugin { 
-  Function &F; 
- 
-public: 
-  static constexpr InstrProfValueKind Kind = IPVK_IndirectCallTarget; 
- 
-  IndirectCallPromotionPlugin(Function &Fn, TargetLibraryInfo &TLI) : F(Fn) {} 
- 
-  void run(std::vector<CandidateInfo> &Candidates) { 
-    std::vector<CallBase *> Result = findIndirectCalls(F); 
-    for (Instruction *I : Result) { 
-      Value *Callee = cast<CallBase>(I)->getCalledOperand(); 
-      Instruction *InsertPt = I; 
-      Instruction *AnnotatedInst = I; 
-      Candidates.emplace_back(CandidateInfo{Callee, InsertPt, AnnotatedInst}); 
-    } 
-  } 
-}; 
- 
-///----------------------- Registration of the plugins ------------------------- 
-/// For now, registering a plugin with the ValueProfileCollector is done by 
-/// adding the plugin type to the VP_PLUGIN_LIST macro. 
-#define VP_PLUGIN_LIST           \ 
-    MemIntrinsicPlugin,          \ 
-    IndirectCallPromotionPlugin 
+//=== ValueProfilePlugins.inc - set of plugins used by ValueProfileCollector =//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a set of plugin classes used in ValueProfileCollectorImpl.
+// Each plugin is responsible for collecting Value Profiling candidates for a
+// particular optimization.
+// Each plugin must satisfy the interface described in ValueProfileCollector.cpp
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueProfileCollector.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
+#include "llvm/IR/InstVisitor.h"
+
+using namespace llvm;
+using CandidateInfo = ValueProfileCollector::CandidateInfo;
+
+extern cl::opt<bool> MemOPOptMemcmpBcmp;
+
+///--------------------------- MemIntrinsicPlugin ------------------------------
+class MemIntrinsicPlugin : public InstVisitor<MemIntrinsicPlugin> {
+  Function &F;
+  TargetLibraryInfo &TLI;
+  std::vector<CandidateInfo> *Candidates;
+
+public:
+  static constexpr InstrProfValueKind Kind = IPVK_MemOPSize;
+
+  MemIntrinsicPlugin(Function &Fn, TargetLibraryInfo &TLI)
+      : F(Fn), TLI(TLI), Candidates(nullptr) {}
+
+  void run(std::vector<CandidateInfo> &Cs) {
+    Candidates = &Cs;
+    visit(F);
+    Candidates = nullptr;
+  }
+  void visitMemIntrinsic(MemIntrinsic &MI) {
+    Value *Length = MI.getLength();
+    // Not instrument constant length calls.
+    if (dyn_cast<ConstantInt>(Length))
+      return;
+
+    Instruction *InsertPt = &MI;
+    Instruction *AnnotatedInst = &MI;
+    Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst});
+  }
+  void visitCallInst(CallInst &CI) {
+    if (!MemOPOptMemcmpBcmp)
+      return;
+    auto *F = CI.getCalledFunction();
+    if (!F)
+      return;
+    LibFunc Func;
+    if (TLI.getLibFunc(CI, Func) &&
+        (Func == LibFunc_memcmp || Func == LibFunc_bcmp)) {
+      Value *Length = CI.getArgOperand(2);
+      // Not instrument constant length calls.
+      if (dyn_cast<ConstantInt>(Length))
+        return;
+      Instruction *InsertPt = &CI;
+      Instruction *AnnotatedInst = &CI;
+      Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst});
+    }
+  }
+};
+
+///------------------------ IndirectCallPromotionPlugin ------------------------
+class IndirectCallPromotionPlugin {
+  Function &F;
+
+public:
+  static constexpr InstrProfValueKind Kind = IPVK_IndirectCallTarget;
+
+  IndirectCallPromotionPlugin(Function &Fn, TargetLibraryInfo &TLI) : F(Fn) {}
+
+  void run(std::vector<CandidateInfo> &Candidates) {
+    std::vector<CallBase *> Result = findIndirectCalls(F);
+    for (Instruction *I : Result) {
+      Value *Callee = cast<CallBase>(I)->getCalledOperand();
+      Instruction *InsertPt = I;
+      Instruction *AnnotatedInst = I;
+      Candidates.emplace_back(CandidateInfo{Callee, InsertPt, AnnotatedInst});
+    }
+  }
+};
+
+///----------------------- Registration of the plugins -------------------------
+/// For now, registering a plugin with the ValueProfileCollector is done by
+/// adding the plugin type to the VP_PLUGIN_LIST macro.
+#define VP_PLUGIN_LIST           \
+    MemIntrinsicPlugin,          \
+    IndirectCallPromotionPlugin
diff --git a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ya.make b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ya.make
index 10b7425404..39dab1eb7d 100644
--- a/contrib/libs/llvm12/lib/Transforms/Instrumentation/ya.make
+++ b/contrib/libs/llvm12/lib/Transforms/Instrumentation/ya.make
@@ -1,12 +1,12 @@
-# Generated by devtools/yamaker. 
- 
-LIBRARY() 
- 
+# Generated by devtools/yamaker.
+
+LIBRARY()
+
 OWNER(
     orivej
     g:cpp-contrib
 )
- 
+
 LICENSE(
     Apache-2.0 WITH LLVM-exception AND
     NCSA
@@ -14,7 +14,7 @@ LICENSE(
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
-PEERDIR( 
+PEERDIR(
     contrib/libs/llvm12
     contrib/libs/llvm12/include
     contrib/libs/llvm12/lib/Analysis
@@ -23,36 +23,36 @@ PEERDIR(
     contrib/libs/llvm12/lib/ProfileData
     contrib/libs/llvm12/lib/Support
     contrib/libs/llvm12/lib/Transforms/Utils
-) 
- 
+)
+
 ADDINCL(
     contrib/libs/llvm12/lib/Transforms/Instrumentation
 )
- 
-NO_COMPILER_WARNINGS() 
- 
-NO_UTIL() 
- 
-SRCS( 
-    AddressSanitizer.cpp 
-    BoundsChecking.cpp 
-    CGProfile.cpp 
-    ControlHeightReduction.cpp 
-    DataFlowSanitizer.cpp 
-    GCOVProfiling.cpp 
-    HWAddressSanitizer.cpp 
-    IndirectCallPromotion.cpp 
-    InstrOrderFile.cpp 
-    InstrProfiling.cpp 
-    Instrumentation.cpp 
+
+NO_COMPILER_WARNINGS()
+
+NO_UTIL()
+
+SRCS(
+    AddressSanitizer.cpp
+    BoundsChecking.cpp
+    CGProfile.cpp
+    ControlHeightReduction.cpp
+    DataFlowSanitizer.cpp
+    GCOVProfiling.cpp
+    HWAddressSanitizer.cpp
+    IndirectCallPromotion.cpp
+    InstrOrderFile.cpp
+    InstrProfiling.cpp
+    Instrumentation.cpp
     MemProfiler.cpp
-    MemorySanitizer.cpp 
-    PGOInstrumentation.cpp 
-    PGOMemOPSizeOpt.cpp 
-    PoisonChecking.cpp 
-    SanitizerCoverage.cpp 
-    ThreadSanitizer.cpp 
-    ValueProfileCollector.cpp 
-) 
- 
-END() 
+    MemorySanitizer.cpp
+    PGOInstrumentation.cpp
+    PGOMemOPSizeOpt.cpp
+    PoisonChecking.cpp
+    SanitizerCoverage.cpp
+    ThreadSanitizer.cpp
+    ValueProfileCollector.cpp
+)
+
+END()
author	orivej <orivej@yandex-team.ru>	2022-02-10 16:45:01 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:01 +0300
commit	2d37894b1b037cf24231090eda8589bbb44fb6fc (patch)
tree	be835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/llvm12/lib/Transforms/Instrumentation
parent	718c552901d703c502ccbefdfc3c9028d608b947 (diff)
download	ydb-2d37894b1b037cf24231090eda8589bbb44fb6fc.tar.gz