diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-06-24 20:22:44 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-06-24 20:22:44 +0000 |
commit | 483b61a50e7423b063fc26985325f594560b3f7e (patch) | |
tree | 5bb205026b61f3dd88d63f43d0b790d518acefec | |
parent | 8055b7e383f74dbc58c8085a0f0c45f4c61f8231 (diff) |
Vendor import of llvm-project branch release/10.xvendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308
llvmorg-10.0.0-129-gd24d5c8e308.
Notes
Notes:
svn path=/vendor/llvm-project/release-10.x/; revision=362593
svn path=/vendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308/; revision=362594; tag=vendor/llvm-project/llvmorg-10.0.0-129-gd24d5c8e308
49 files changed, 2180 insertions, 406 deletions
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0a60873443fc..391c895a453b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2267,6 +2267,14 @@ def mspeculative_load_hardening : Flag<["-"], "mspeculative-load-hardening">, Group<m_Group>, Flags<[CoreOption,CC1Option]>; def mno_speculative_load_hardening : Flag<["-"], "mno-speculative-load-hardening">, Group<m_Group>, Flags<[CoreOption]>; +def mlvi_hardening : Flag<["-"], "mlvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>, + HelpText<"Enable all mitigations for Load Value Injection (LVI)">; +def mno_lvi_hardening : Flag<["-"], "mno-lvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>, + HelpText<"Disable mitigations for Load Value Injection (LVI)">; +def mlvi_cfi : Flag<["-"], "mlvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>, + HelpText<"Enable only control-flow mitigations for Load Value Injection (LVI)">; +def mno_lvi_cfi : Flag<["-"], "mno-lvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>, + HelpText<"Disable control-flow mitigations for Load Value Injection (LVI)">; def mrelax : Flag<["-"], "mrelax">, Group<m_riscv_Features_Group>, HelpText<"Enable linker relaxation">; diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index 270aa7ff9181..ef5c2264a0b0 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -276,11 +276,12 @@ public: break; case 'Q': // Memory operand that is an offset from a register (it is // usually better to use `m' or `es' in asm statements) + Info.setAllowsRegister(); + LLVM_FALLTHROUGH; case 'Z': // Memory operand that is an indexed or indirect from a // register (it is usually better to use `m' or `es' in // asm statements) Info.setAllowsMemory(); - Info.setAllowsRegister(); break; case 'R': // AIX TOC entry case 'a': // Address operand that is an indexed or indirect from a diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index ac9a294ee3fa..60fd932fbe6f 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -454,8 +454,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, << lastArgumentForMask(D, Args, Kinds & NeedsLTO) << "-flto"; } - if ((Kinds & SanitizerKind::ShadowCallStack) && - TC.getTriple().getArch() == llvm::Triple::aarch64 && + if ((Kinds & SanitizerKind::ShadowCallStack) && TC.getTriple().isAArch64() && !llvm::AArch64::isX18ReservedByDefault(TC.getTriple()) && !Args.hasArg(options::OPT_ffixed_x18)) { D.Diag(diag::err_drv_argument_only_allowed_with) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index cab97b1a601a..18400d9def54 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -954,15 +954,12 @@ SanitizerMask ToolChain::getSupportedSanitizers() const { if (getTriple().getArch() == llvm::Triple::x86 || getTriple().getArch() == llvm::Triple::x86_64 || getTriple().getArch() == llvm::Triple::arm || - getTriple().getArch() == llvm::Triple::aarch64 || getTriple().getArch() == llvm::Triple::wasm32 || - getTriple().getArch() == llvm::Triple::wasm64) + getTriple().getArch() == llvm::Triple::wasm64 || getTriple().isAArch64()) Res |= SanitizerKind::CFIICall; - if (getTriple().getArch() == llvm::Triple::x86_64 || - getTriple().getArch() == llvm::Triple::aarch64) + if (getTriple().getArch() == llvm::Triple::x86_64 || getTriple().isAArch64()) Res |= SanitizerKind::ShadowCallStack; - if (getTriple().getArch() == llvm::Triple::aarch64 || - getTriple().getArch() == llvm::Triple::aarch64_be) + if (getTriple().isAArch64()) Res |= SanitizerKind::MemTag; return Res; } diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index d1e0c8253b79..d170b7ac3a77 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -146,6 +146,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, // flags). This is a bit hacky but keeps existing usages working. We should // consider deprecating this and instead warn if the user requests external // retpoline thunks and *doesn't* request some form of retpolines. + auto SpectreOpt = clang::driver::options::ID::OPT_INVALID; if (Args.hasArgNoClaim(options::OPT_mretpoline, options::OPT_mno_retpoline, options::OPT_mspeculative_load_hardening, options::OPT_mno_speculative_load_hardening)) { @@ -153,12 +154,14 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, false)) { Features.push_back("+retpoline-indirect-calls"); Features.push_back("+retpoline-indirect-branches"); + SpectreOpt = options::OPT_mretpoline; } else if (Args.hasFlag(options::OPT_mspeculative_load_hardening, options::OPT_mno_speculative_load_hardening, false)) { // On x86, speculative load hardening relies on at least using retpolines // for indirect calls. Features.push_back("+retpoline-indirect-calls"); + SpectreOpt = options::OPT_mspeculative_load_hardening; } } else if (Args.hasFlag(options::OPT_mretpoline_external_thunk, options::OPT_mno_retpoline_external_thunk, false)) { @@ -166,6 +169,26 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, // eventually switch to an error here. Features.push_back("+retpoline-indirect-calls"); Features.push_back("+retpoline-indirect-branches"); + SpectreOpt = options::OPT_mretpoline_external_thunk; + } + + auto LVIOpt = clang::driver::options::ID::OPT_INVALID; + if (Args.hasFlag(options::OPT_mlvi_hardening, options::OPT_mno_lvi_hardening, + false)) { + Features.push_back("+lvi-load-hardening"); + Features.push_back("+lvi-cfi"); // load hardening implies CFI protection + LVIOpt = options::OPT_mlvi_hardening; + } else if (Args.hasFlag(options::OPT_mlvi_cfi, options::OPT_mno_lvi_cfi, + false)) { + Features.push_back("+lvi-cfi"); + LVIOpt = options::OPT_mlvi_cfi; + } + + if (SpectreOpt != clang::driver::options::ID::OPT_INVALID && + LVIOpt != clang::driver::options::ID::OPT_INVALID) { + D.Diag(diag::err_drv_argument_not_allowed_with) + << D.getOpts().getOptionName(SpectreOpt) + << D.getOpts().getOptionName(LVIOpt); } // Now add any that the user explicitly requested on the command line, diff --git a/llvm/lib/Target/Hexagon/RDFGraph.h b/llvm/include/llvm/CodeGen/RDFGraph.h index 585f43e116f9..585f43e116f9 100644 --- a/llvm/lib/Target/Hexagon/RDFGraph.h +++ b/llvm/include/llvm/CodeGen/RDFGraph.h diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.h b/llvm/include/llvm/CodeGen/RDFLiveness.h index ea4890271726..ea4890271726 100644 --- a/llvm/lib/Target/Hexagon/RDFLiveness.h +++ b/llvm/include/llvm/CodeGen/RDFLiveness.h diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h index 4afaf80e4659..4afaf80e4659 100644 --- a/llvm/lib/Target/Hexagon/RDFRegisters.h +++ b/llvm/include/llvm/CodeGen/RDFRegisters.h diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index f87317445753..fc9fa2153aea 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -488,6 +488,9 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmsumudm : GCCBuiltin<"__builtin_altivec_vmsumudm">, + Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v1i128_ty], [IntrNoMem]>; def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/Support/ManagedStatic.h b/llvm/include/llvm/Support/ManagedStatic.h index bbd0d04ed040..f2b41422f131 100644 --- a/llvm/include/llvm/Support/ManagedStatic.h +++ b/llvm/include/llvm/Support/ManagedStatic.h @@ -40,8 +40,8 @@ template <typename T, size_t N> struct object_deleter<T[N]> { // constexpr, a dynamic initializer may be emitted depending on optimization // settings. For the affected versions of MSVC, use the old linker // initialization pattern of not providing a constructor and leaving the fields -// uninitialized. -#if !defined(_MSC_VER) || defined(__clang__) +// uninitialized. See http://llvm.org/PR41367 for details. +#if !defined(_MSC_VER) || (_MSC_VER >= 1925) || defined(__clang__) #define LLVM_USE_CONSTEXPR_CTOR #endif diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 1700c6c4640d..46ad5a619770 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -959,6 +959,10 @@ def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = 1; let MemoryVT = i32; } +def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = f16; +} def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = 1; let MemoryVT = f32; @@ -1094,6 +1098,11 @@ def truncstorei32 : PatFrag<(ops node:$val, node:$ptr), let IsStore = 1; let MemoryVT = i32; } +def truncstoref16 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = f16; +} def truncstoref32 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { let IsStore = 1; diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index e852d663c6b4..e439c94a7325 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -2059,12 +2059,13 @@ char BasicAAWrapperPass::ID = 0; void BasicAAWrapperPass::anchor() {} INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa", - "Basic Alias Analysis (stateless AA impl)", false, true) + "Basic Alias Analysis (stateless AA impl)", true, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass) INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa", - "Basic Alias Analysis (stateless AA impl)", false, true) + "Basic Alias Analysis (stateless AA impl)", true, true) FunctionPass *llvm::createBasicAAWrapperPass() { return new BasicAAWrapperPass(); diff --git a/llvm/lib/Target/Hexagon/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp index 0cb35dc98819..437a6b030096 100644 --- a/llvm/lib/Target/Hexagon/RDFGraph.cpp +++ b/llvm/lib/CodeGen/RDFGraph.cpp @@ -8,8 +8,6 @@ // // Target-independent, SSA-based data flow graph for register data flow (RDF). // -#include "RDFGraph.h" -#include "RDFRegisters.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -20,6 +18,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -753,8 +753,10 @@ RegisterSet DataFlowGraph::getLandingPadLiveIns() const { const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); if (RegisterId R = TLI.getExceptionPointerRegister(PF)) LR.insert(RegisterRef(R)); - if (RegisterId R = TLI.getExceptionSelectorRegister(PF)) - LR.insert(RegisterRef(R)); + if (!isFuncletEHPersonality(classifyEHPersonality(PF))) { + if (RegisterId R = TLI.getExceptionSelectorRegister(PF)) + LR.insert(RegisterRef(R)); + } return LR; } diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp index e2c007c9d01a..0bcd27f8ea45 100644 --- a/llvm/lib/Target/Hexagon/RDFLiveness.cpp +++ b/llvm/lib/CodeGen/RDFLiveness.cpp @@ -22,9 +22,6 @@ // and Embedded Architectures and Compilers", 8 (4), // <10.1145/2086696.2086706>. <hal-00647369> // -#include "RDFLiveness.h" -#include "RDFGraph.h" -#include "RDFRegisters.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -33,6 +30,9 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp index b5675784e34b..bd8661816e71 100644 --- a/llvm/lib/Target/Hexagon/RDFRegisters.cpp +++ b/llvm/lib/CodeGen/RDFRegisters.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "RDFRegisters.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 297b11de17a9..fa2f0777897b 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -147,8 +147,17 @@ void llvm::computeLTOCacheKey( // Include the hash for the current module auto ModHash = Index.getModuleHash(ModuleID); Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); + + std::vector<uint64_t> ExportsGUID; + ExportsGUID.reserve(ExportList.size()); for (const auto &VI : ExportList) { auto GUID = VI.getGUID(); + ExportsGUID.push_back(GUID); + } + + // Sort the export list elements GUIDs. + llvm::sort(ExportsGUID); + for (uint64_t GUID : ExportsGUID) { // The export list can impact the internalization, be conservative here Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID))); } @@ -156,12 +165,23 @@ void llvm::computeLTOCacheKey( // Include the hash for every module we import functions from. The set of // imported symbols for each module may affect code generation and is // sensitive to link order, so include that as well. - for (auto &Entry : ImportList) { - auto ModHash = Index.getModuleHash(Entry.first()); + using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator; + std::vector<ImportMapIteratorTy> ImportModulesVector; + ImportModulesVector.reserve(ImportList.size()); + + for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end(); + ++It) { + ImportModulesVector.push_back(It); + } + llvm::sort(ImportModulesVector, + [](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs) + -> bool { return Lhs->getKey() < Rhs->getKey(); }); + for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) { + auto ModHash = Index.getModuleHash(EntryIt->first()); Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); - AddUint64(Entry.second.size()); - for (auto &Fn : Entry.second) + AddUint64(EntryIt->second.size()); + for (auto &Fn : EntryIt->second) AddUint64(Fn); } diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp index 6f5f58554d09..d407edfbd966 100644 --- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -304,7 +304,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant " << val << '\n'); - SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64); + SDValue NVal = CurDAG->getConstant(val, DL, LD->getValueType(0)); // After replacement, the current node is dead, we need to // go backward one step to make iterator still work diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index a9fb04f20d1c..6daeb3b4b63b 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -600,6 +600,38 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId, bool CheckPointer, bool SeenPointer) { if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) { TypeId = DIToIdMap[Ty]; + + // To handle the case like the following: + // struct t; + // typedef struct t _t; + // struct s1 { _t *c; }; + // int test1(struct s1 *arg) { ... } + // + // struct t { int a; int b; }; + // struct s2 { _t c; } + // int test2(struct s2 *arg) { ... } + // + // During traversing test1() argument, "_t" is recorded + // in DIToIdMap and a forward declaration fixup is created + // for "struct t" to avoid pointee type traversal. + // + // During traversing test2() argument, even if we see "_t" is + // already defined, we should keep moving to eventually + // bring in types for "struct t". Otherwise, the "struct s2" + // definition won't be correct. + if (Ty && (!CheckPointer || !SeenPointer)) { + if (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) { + unsigned Tag = DTy->getTag(); + if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type || + Tag == dwarf::DW_TAG_volatile_type || + Tag == dwarf::DW_TAG_restrict_type) { + uint32_t TmpTypeId; + visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer, + SeenPointer); + } + } + } + return; } diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 886034d9601a..f1fe51f5e54f 100644 --- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -12,9 +12,6 @@ #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" @@ -27,6 +24,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp index 517ad1c6ee7b..f26e23befde2 100644 --- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp +++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -11,9 +11,6 @@ #include "MCTargetDesc/HexagonBaseInfo.h" #include "RDFCopy.h" #include "RDFDeadCode.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -24,6 +21,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp index a9d39fd4b2dc..34d58f0a7a23 100644 --- a/llvm/lib/Target/Hexagon/RDFCopy.cpp +++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -11,13 +11,13 @@ //===----------------------------------------------------------------------===// #include "RDFCopy.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/llvm/lib/Target/Hexagon/RDFCopy.h b/llvm/lib/Target/Hexagon/RDFCopy.h index 1450ab884849..99b18a75d8c2 100644 --- a/llvm/lib/Target/Hexagon/RDFCopy.h +++ b/llvm/lib/Target/Hexagon/RDFCopy.h @@ -9,9 +9,9 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H #define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/MachineFunction.h" #include <map> #include <vector> diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp index af86c7b1956b..5a98debd3c00 100644 --- a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp @@ -9,13 +9,13 @@ // RDF-based generic dead code elimination. #include "RDFDeadCode.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/Support/Debug.h" #include <queue> diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.h b/llvm/lib/Target/Hexagon/RDFDeadCode.h index 7f91977e1d6c..859c8161d355 100644 --- a/llvm/lib/Target/Hexagon/RDFDeadCode.h +++ b/llvm/lib/Target/Hexagon/RDFDeadCode.h @@ -23,8 +23,8 @@ #ifndef RDF_DEADCODE_H #define RDF_DEADCODE_H -#include "RDFGraph.h" -#include "RDFLiveness.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/ADT/SetVector.h" namespace llvm { diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index 9b3d13989ee2..d7e3519d5539 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -373,6 +373,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], VMSUMSHS, VMSUMUBM, VMSUMUHM, + VMSUMUDM, VMSUMUHS, VMULESB, VMULESH, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 00f59bba52e8..ca1649fae258 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } + if (Subtarget.isISA3_0()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal); + setTruncStoreAction(MVT::f64, MVT::f16, Legal); + setTruncStoreAction(MVT::f32, MVT::f16, Legal); + } else { + // No extending loads from f16 or HW conversions back and forth. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + } + setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. @@ -677,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } + setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand); if (!Subtarget.hasP8Vector()) { setOperationAction(ISD::SMAX, MVT::v2i64, Expand); setOperationAction(ISD::SMIN, MVT::v2i64, Expand); @@ -10361,6 +10379,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"); + // FIXME: handle extends from half precision float vectors on P9. // We only want to custom lower an extend from v2f32 to v2f64. if (Op.getValueType() != MVT::v2f64 || Op.getOperand(0).getValueType() != MVT::v2f32) @@ -10574,6 +10593,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: // Don't handle bitcast here. return; + case ISD::FP_EXTEND: + SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG); + if (Lowered) + Results.push_back(Lowered); + return; } } @@ -15255,7 +15279,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, if (!VT.isSimple()) return false; - if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess()) + if (VT.isFloatingPoint() && !VT.isVector() && + !Subtarget.allowsUnalignedFPAccess()) return false; if (VT.getSimpleVT().isVector()) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index e0c381827b87..2e1485373d19 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -637,7 +637,7 @@ namespace llvm { /// then the VPERM for the shuffle. All in all a very slow sequence. TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override { - if (VT.getScalarSizeInBits() % 8 == 0) + if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index f94816a35f79..6e8635f2413c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1342,6 +1342,10 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>; def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">; let Predicates = [HasP9Altivec] in { +// Vector Multiply-Sum +def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm, + v1i128, v2i64, v1i128>; + // i8 element comparisons. def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>; def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 30906a32b00c..d7925befcd37 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2631,6 +2631,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, if (Opc != PPC::ADDI && Opc != PPC::ADDI8) return false; + // The operand may not necessarily be an immediate - it could be a relocation. + if (!ADDIMI.getOperand(2).isImm()) + return false; + Imm = ADDIMI.getOperand(2).getImm(); return true; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index be6b30ffa08b..95e5ff6b130d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)), (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; + // Load/convert and convert/store patterns for f16. + def : Pat<(f64 (extloadf16 xoaddr:$src)), + (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>; + def : Pat<(truncstoref16 f64:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>; + def : Pat<(f32 (extloadf16 xoaddr:$src)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; + def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), + (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; + def : Pat<(f64 (f16_to_fp i32:$A)), + (f64 (XSCVHPDP (MTVSRWZ $A)))>; + def : Pat<(f32 (f16_to_fp i32:$A)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>; + def : Pat<(i32 (fp_to_f16 f32:$A)), + (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>; + def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>; + let Predicates = [IsBigEndian, HasP9Vector] in { // Scalar stores of i8 def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), diff --git a/llvm/lib/Target/X86/ImmutableGraph.h b/llvm/lib/Target/X86/ImmutableGraph.h new file mode 100644 index 000000000000..5833017037a5 --- /dev/null +++ b/llvm/lib/Target/X86/ImmutableGraph.h @@ -0,0 +1,446 @@ +//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: ImmutableGraph is a fast DAG implementation that cannot be +/// modified, except by creating a new ImmutableGraph. ImmutableGraph is +/// implemented as two arrays: one containing nodes, and one containing edges. +/// The advantages to this implementation are two-fold: +/// 1. Iteration and traversal operations benefit from cache locality. +/// 2. Operations on sets of nodes/edges are efficient, and representations of +/// those sets in memory are compact. For instance, a set of edges is +/// implemented as a bit vector, wherein each bit corresponds to one edge in +/// the edge array. This implies a lower bound of 64x spatial improvement +/// over, e.g., an llvm::DenseSet or llvm::SmallSet. It also means that +/// insert/erase/contains operations complete in negligible constant time: +/// insert and erase require one load and one store, and contains requires +/// just one load. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H +#define LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <iterator> +#include <utility> +#include <vector> + +namespace llvm { + +template <typename NodeValueT, typename EdgeValueT> class ImmutableGraph { + using Traits = GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *>; + template <typename> friend class ImmutableGraphBuilder; + +public: + using node_value_type = NodeValueT; + using edge_value_type = EdgeValueT; + using size_type = int; + class Node; + class Edge { + friend class ImmutableGraph; + template <typename> friend class ImmutableGraphBuilder; + + const Node *Dest; + edge_value_type Value; + + public: + const Node *getDest() const { return Dest; }; + const edge_value_type &getValue() const { return Value; } + }; + class Node { + friend class ImmutableGraph; + template <typename> friend class ImmutableGraphBuilder; + + const Edge *Edges; + node_value_type Value; + + public: + const node_value_type &getValue() const { return Value; } + + const Edge *edges_begin() const { return Edges; } + // Nodes are allocated sequentially. Edges for a node are stored together. + // The end of this Node's edges is the beginning of the next node's edges. + // An extra node was allocated to hold the end pointer for the last real + // node. + const Edge *edges_end() const { return (this + 1)->Edges; } + ArrayRef<Edge> edges() const { + return makeArrayRef(edges_begin(), edges_end()); + } + }; + +protected: + ImmutableGraph(std::unique_ptr<Node[]> Nodes, std::unique_ptr<Edge[]> Edges, + size_type NodesSize, size_type EdgesSize) + : Nodes(std::move(Nodes)), Edges(std::move(Edges)), NodesSize(NodesSize), + EdgesSize(EdgesSize) {} + ImmutableGraph(const ImmutableGraph &) = delete; + ImmutableGraph(ImmutableGraph &&) = delete; + ImmutableGraph &operator=(const ImmutableGraph &) = delete; + ImmutableGraph &operator=(ImmutableGraph &&) = delete; + +public: + ArrayRef<Node> nodes() const { return makeArrayRef(Nodes.get(), NodesSize); } + const Node *nodes_begin() const { return nodes().begin(); } + const Node *nodes_end() const { return nodes().end(); } + + ArrayRef<Edge> edges() const { return makeArrayRef(Edges.get(), EdgesSize); } + const Edge *edges_begin() const { return edges().begin(); } + const Edge *edges_end() const { return edges().end(); } + + size_type nodes_size() const { return NodesSize; } + size_type edges_size() const { return EdgesSize; } + + // Node N must belong to this ImmutableGraph. + size_type getNodeIndex(const Node &N) const { + return std::distance(nodes_begin(), &N); + } + // Edge E must belong to this ImmutableGraph. + size_type getEdgeIndex(const Edge &E) const { + return std::distance(edges_begin(), &E); + } + + // FIXME: Could NodeSet and EdgeSet be templated to share code? + class NodeSet { + const ImmutableGraph &G; + BitVector V; + + public: + NodeSet(const ImmutableGraph &G, bool ContainsAll = false) + : G{G}, V{static_cast<unsigned>(G.nodes_size()), ContainsAll} {} + bool insert(const Node &N) { + size_type Idx = G.getNodeIndex(N); + bool AlreadyExists = V.test(Idx); + V.set(Idx); + return !AlreadyExists; + } + void erase(const Node &N) { + size_type Idx = G.getNodeIndex(N); + V.reset(Idx); + } + bool contains(const Node &N) const { + size_type Idx = G.getNodeIndex(N); + return V.test(Idx); + } + void clear() { V.reset(); } + size_type empty() const { return V.none(); } + /// Return the number of elements in the set + size_type count() const { return V.count(); } + /// Return the size of the set's domain + size_type size() const { return V.size(); } + /// Set union + NodeSet &operator|=(const NodeSet &RHS) { + assert(&this->G == &RHS.G); + V |= RHS.V; + return *this; + } + /// Set intersection + NodeSet &operator&=(const NodeSet &RHS) { + assert(&this->G == &RHS.G); + V &= RHS.V; + return *this; + } + /// Set disjoint union + NodeSet &operator^=(const NodeSet &RHS) { + assert(&this->G == &RHS.G); + V ^= RHS.V; + return *this; + } + + using index_iterator = typename BitVector::const_set_bits_iterator; + index_iterator index_begin() const { return V.set_bits_begin(); } + index_iterator index_end() const { return V.set_bits_end(); } + void set(size_type Idx) { V.set(Idx); } + void reset(size_type Idx) { V.reset(Idx); } + + class iterator { + const NodeSet &Set; + size_type Current; + + void advance() { + assert(Current != -1); + Current = Set.V.find_next(Current); + } + + public: + iterator(const NodeSet &Set, size_type Begin) + : Set{Set}, Current{Begin} {} + iterator operator++(int) { + iterator Tmp = *this; + advance(); + return Tmp; + } + iterator &operator++() { + advance(); + return *this; + } + Node *operator*() const { + assert(Current != -1); + return Set.G.nodes_begin() + Current; + } + bool operator==(const iterator &other) const { + assert(&this->Set == &other.Set); + return this->Current == other.Current; + } + bool operator!=(const iterator &other) const { return !(*this == other); } + }; + + iterator begin() const { return iterator{*this, V.find_first()}; } + iterator end() const { return iterator{*this, -1}; } + }; + + class EdgeSet { + const ImmutableGraph &G; + BitVector V; + + public: + EdgeSet(const ImmutableGraph &G, bool ContainsAll = false) + : G{G}, V{static_cast<unsigned>(G.edges_size()), ContainsAll} {} + bool insert(const Edge &E) { + size_type Idx = G.getEdgeIndex(E); + bool AlreadyExists = V.test(Idx); + V.set(Idx); + return !AlreadyExists; + } + void erase(const Edge &E) { + size_type Idx = G.getEdgeIndex(E); + V.reset(Idx); + } + bool contains(const Edge &E) const { + size_type Idx = G.getEdgeIndex(E); + return V.test(Idx); + } + void clear() { V.reset(); } + bool empty() const { return V.none(); } + /// Return the number of elements in the set + size_type count() const { return V.count(); } + /// Return the size of the set's domain + size_type size() const { return V.size(); } + /// Set union + EdgeSet &operator|=(const EdgeSet &RHS) { + assert(&this->G == &RHS.G); + V |= RHS.V; + return *this; + } + /// Set intersection + EdgeSet &operator&=(const EdgeSet &RHS) { + assert(&this->G == &RHS.G); + V &= RHS.V; + return *this; + } + /// Set disjoint union + EdgeSet &operator^=(const EdgeSet &RHS) { + assert(&this->G == &RHS.G); + V ^= RHS.V; + return *this; + } + + using index_iterator = typename BitVector::const_set_bits_iterator; + index_iterator index_begin() const { return V.set_bits_begin(); } + index_iterator index_end() const { return V.set_bits_end(); } + void set(size_type Idx) { V.set(Idx); } + void reset(size_type Idx) { V.reset(Idx); } + + class iterator { + const EdgeSet &Set; + size_type Current; + + void advance() { + assert(Current != -1); + Current = Set.V.find_next(Current); + } + + public: + iterator(const EdgeSet &Set, size_type Begin) + : Set{Set}, Current{Begin} {} + iterator operator++(int) { + iterator Tmp = *this; + advance(); + return Tmp; + } + iterator &operator++() { + advance(); + return *this; + } + Edge *operator*() const { + assert(Current != -1); + return Set.G.edges_begin() + Current; + } + bool operator==(const iterator &other) const { + assert(&this->Set == &other.Set); + return this->Current == other.Current; + } + bool operator!=(const iterator &other) const { return !(*this == other); } + }; + + iterator begin() const { return iterator{*this, V.find_first()}; } + iterator end() const { return iterator{*this, -1}; } + }; + +private: + std::unique_ptr<Node[]> Nodes; + std::unique_ptr<Edge[]> Edges; + size_type NodesSize; + size_type EdgesSize; +}; + +template <typename GraphT> class ImmutableGraphBuilder { + using node_value_type = typename GraphT::node_value_type; + using edge_value_type = typename GraphT::edge_value_type; + static_assert( + std::is_base_of<ImmutableGraph<node_value_type, edge_value_type>, + GraphT>::value, + "Template argument to ImmutableGraphBuilder must derive from " + "ImmutableGraph<>"); + using size_type = typename GraphT::size_type; + using NodeSet = typename GraphT::NodeSet; + using Node = typename GraphT::Node; + using EdgeSet = typename GraphT::EdgeSet; + using Edge = typename GraphT::Edge; + using BuilderEdge = std::pair<edge_value_type, size_type>; + using EdgeList = std::vector<BuilderEdge>; + using BuilderVertex = std::pair<node_value_type, EdgeList>; + using VertexVec = std::vector<BuilderVertex>; + +public: + using BuilderNodeRef = size_type; + + BuilderNodeRef addVertex(const node_value_type &V) { + auto I = AdjList.emplace(AdjList.end(), V, EdgeList{}); + return std::distance(AdjList.begin(), I); + } + + void addEdge(const edge_value_type &E, BuilderNodeRef From, + BuilderNodeRef To) { + AdjList[From].second.emplace_back(E, To); + } + + bool empty() const { return AdjList.empty(); } + + template <typename... ArgT> std::unique_ptr<GraphT> get(ArgT &&... Args) { + size_type VertexSize = AdjList.size(), EdgeSize = 0; + for (const auto &V : AdjList) { + EdgeSize += V.second.size(); + } + auto VertexArray = + std::make_unique<Node[]>(VertexSize + 1 /* terminator node */); + auto EdgeArray = std::make_unique<Edge[]>(EdgeSize); + size_type VI = 0, EI = 0; + for (; VI < VertexSize; ++VI) { + VertexArray[VI].Value = std::move(AdjList[VI].first); + VertexArray[VI].Edges = &EdgeArray[EI]; + auto NumEdges = static_cast<size_type>(AdjList[VI].second.size()); + for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) { + auto &E = AdjList[VI].second[VEI]; + EdgeArray[EI].Value = std::move(E.first); + EdgeArray[EI].Dest = &VertexArray[E.second]; + } + } + assert(VI == VertexSize && EI == EdgeSize && "ImmutableGraph malformed"); + VertexArray[VI].Edges = &EdgeArray[EdgeSize]; // terminator node + return std::make_unique<GraphT>(std::move(VertexArray), + std::move(EdgeArray), VertexSize, EdgeSize, + std::forward<ArgT>(Args)...); + } + + template <typename... ArgT> + static std::unique_ptr<GraphT> trim(const GraphT &G, const NodeSet &TrimNodes, + const EdgeSet &TrimEdges, + ArgT &&... Args) { + size_type NewVertexSize = G.nodes_size() - TrimNodes.count(); + size_type NewEdgeSize = G.edges_size() - TrimEdges.count(); + auto NewVertexArray = + std::make_unique<Node[]>(NewVertexSize + 1 /* terminator node */); + auto NewEdgeArray = std::make_unique<Edge[]>(NewEdgeSize); + + // Walk the nodes and determine the new index for each node. + size_type NewNodeIndex = 0; + std::vector<size_type> RemappedNodeIndex(G.nodes_size()); + for (const Node &N : G.nodes()) { + if (TrimNodes.contains(N)) + continue; + RemappedNodeIndex[G.getNodeIndex(N)] = NewNodeIndex++; + } + assert(NewNodeIndex == NewVertexSize && + "Should have assigned NewVertexSize indices"); + + size_type VertexI = 0, EdgeI = 0; + for (const Node &N : G.nodes()) { + if (TrimNodes.contains(N)) + continue; + NewVertexArray[VertexI].Value = N.getValue(); + NewVertexArray[VertexI].Edges = &NewEdgeArray[EdgeI]; + for (const Edge &E : N.edges()) { + if (TrimEdges.contains(E)) + continue; + NewEdgeArray[EdgeI].Value = E.getValue(); + size_type DestIdx = G.getNodeIndex(*E.getDest()); + size_type NewIdx = RemappedNodeIndex[DestIdx]; + assert(NewIdx < NewVertexSize); + NewEdgeArray[EdgeI].Dest = &NewVertexArray[NewIdx]; + ++EdgeI; + } + ++VertexI; + } + assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize && + "Gadget graph malformed"); + NewVertexArray[VertexI].Edges = &NewEdgeArray[NewEdgeSize]; // terminator + return std::make_unique<GraphT>(std::move(NewVertexArray), + std::move(NewEdgeArray), NewVertexSize, + NewEdgeSize, std::forward<ArgT>(Args)...); + } + +private: + VertexVec AdjList; +}; + +template <typename NodeValueT, typename EdgeValueT> +struct GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *> { + using GraphT = ImmutableGraph<NodeValueT, EdgeValueT>; + using NodeRef = typename GraphT::Node const *; + using EdgeRef = typename GraphT::Edge const &; + + static NodeRef edge_dest(EdgeRef E) { return E.getDest(); } + using ChildIteratorType = + mapped_iterator<typename GraphT::Edge const *, decltype(&edge_dest)>; + + static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); } + static ChildIteratorType child_begin(NodeRef N) { + return {N->edges_begin(), &edge_dest}; + } + static ChildIteratorType child_end(NodeRef N) { + return {N->edges_end(), &edge_dest}; + } + + static NodeRef getNode(typename GraphT::Node const &N) { return NodeRef{&N}; } + using nodes_iterator = + mapped_iterator<typename GraphT::Node const *, decltype(&getNode)>; + static nodes_iterator nodes_begin(GraphT *G) { + return {G->nodes_begin(), &getNode}; + } + static nodes_iterator nodes_end(GraphT *G) { + return {G->nodes_end(), &getNode}; + } + + using ChildEdgeIteratorType = typename GraphT::Edge const *; + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->edges_begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { + return N->edges_end(); + } + static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 0481a40d462a..a0ab5c3a5b3c 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -120,7 +120,7 @@ FunctionPass *createX86DomainReassignmentPass(); FunctionPass *createX86EvexToVexInsts(); /// This pass creates the thunks for the retpoline feature. -FunctionPass *createX86RetpolineThunksPass(); +FunctionPass *createX86IndirectThunksPass(); /// This pass ensures instructions featuring a memory operand /// have distinctive <LineNumber, Discriminator> (with respect to eachother) @@ -133,6 +133,9 @@ InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &, X86RegisterBankInfo &); +FunctionPass *createX86LoadValueInjectionLoadHardeningPass(); +FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass(); +FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); void initializeEvexToVexInstPassPass(PassRegistry &); @@ -148,6 +151,9 @@ void initializeX86DomainReassignmentPass(PassRegistry &); void initializeX86ExecutionDomainFixPass(PassRegistry &); void initializeX86ExpandPseudoPass(PassRegistry &); void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); +void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &); +void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &); +void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &); void initializeX86OptimizeLEAPassPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index a2b11d55f650..bb8952f54e3a 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -426,6 +426,22 @@ def FeatureRetpolineExternalThunk "ourselves. Only has effect when combined with some other retpoline " "feature", [FeatureRetpolineIndirectCalls]>; +// Mitigate LVI attacks against indirect calls/branches and call returns +def FeatureLVIControlFlowIntegrity + : SubtargetFeature< + "lvi-cfi", "UseLVIControlFlowIntegrity", "true", + "Prevent indirect calls/branches from using a memory operand, and " + "precede all indirect calls/branches from a register with an " + "LFENCE instruction to serialize control flow. Also decompose RET " + "instructions into a POP+LFENCE+JMP sequence.">; + +// Mitigate LVI attacks against data loads +def FeatureLVILoadHardening + : SubtargetFeature< + "lvi-load-hardening", "UseLVILoadHardening", "true", + "Insert LFENCE instructions to prevent data speculatively injected " + "into loads from being used maliciously.">; + // Direct Move instructions. def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", "Support movdiri instruction">; diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 1dbf40683564..a1d256ea872d 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3202,8 +3202,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) return false; - // Functions using retpoline for indirect calls need to use SDISel. - if (Subtarget->useRetpolineIndirectCalls()) + // Functions using thunks for indirect calls need to use SDISel. + if (Subtarget->useIndirectThunkCalls()) return false; // Handle only C, fastcc, and webkit_js calling conventions for now. diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 799c1f5d1285..1da20371caf5 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -765,10 +765,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, bool InProlog) const { bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; - // FIXME: Add retpoline support and remove this. - if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls()) + // FIXME: Add indirect thunk support and remove this. + if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls()) report_fatal_error("Emitting stack probe calls on 64-bit with the large " - "code model and retpoline not yet implemented."); + "code model and indirect thunks not yet implemented."); unsigned CallOp; if (Is64Bit) @@ -2493,9 +2493,9 @@ void X86FrameLowering::adjustForSegmentedStacks( // is laid out within 2^31 bytes of each function body, but this seems // to be sufficient for JIT. // FIXME: Add retpoline support and remove the error here.. - if (STI.useRetpolineIndirectCalls()) + if (STI.useIndirectThunkCalls()) report_fatal_error("Emitting morestack calls on 64-bit with the large " - "code model and retpoline not yet implemented."); + "code model and thunks not yet implemented."); BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) .addReg(X86::RIP) .addImm(0) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index bf33f399db28..88af0ebcfd0e 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -987,7 +987,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (OptLevel != CodeGenOpt::None && // Only do this when the target can fold the load into the call or // jmp. - !Subtarget->useRetpolineIndirectCalls() && + !Subtarget->useIndirectThunkCalls() && ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || (N->getOpcode() == X86ISD::TC_RETURN && (Subtarget->is64Bit() || diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1523d56cc4e7..c8720d9ae3a6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30221,8 +30221,8 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask, } bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { - // If the subtarget is using retpolines, we need to not generate jump tables. - if (Subtarget.useRetpolineIndirectBranches()) + // If the subtarget is using thunks, we need to not generate jump tables. + if (Subtarget.useIndirectThunkBranches()) return false; // Otherwise, fallback on the generic logic. @@ -31345,22 +31345,22 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI, return BB; } -static unsigned getOpcodeForRetpoline(unsigned RPOpc) { +static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) { switch (RPOpc) { - case X86::RETPOLINE_CALL32: + case X86::INDIRECT_THUNK_CALL32: return X86::CALLpcrel32; - case X86::RETPOLINE_CALL64: + case X86::INDIRECT_THUNK_CALL64: return X86::CALL64pcrel32; - case X86::RETPOLINE_TCRETURN32: + case X86::INDIRECT_THUNK_TCRETURN32: return X86::TCRETURNdi; - case X86::RETPOLINE_TCRETURN64: + case X86::INDIRECT_THUNK_TCRETURN64: return X86::TCRETURNdi64; } - llvm_unreachable("not retpoline opcode"); + llvm_unreachable("not indirect thunk opcode"); } -static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, - unsigned Reg) { +static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget, + unsigned Reg) { if (Subtarget.useRetpolineExternalThunk()) { // When using an external thunk for retpolines, we pick names that match the // names GCC happens to use as well. This helps simplify the implementation @@ -31392,39 +31392,48 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); return "__x86_indirect_thunk_r11"; } + llvm_unreachable("unexpected reg for external indirect thunk"); + } + + if (Subtarget.useRetpolineIndirectCalls() || + Subtarget.useRetpolineIndirectBranches()) { + // When targeting an internal COMDAT thunk use an LLVM-specific name. + switch (Reg) { + case X86::EAX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_eax"; + case X86::ECX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_ecx"; + case X86::EDX: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_edx"; + case X86::EDI: + assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); + return "__llvm_retpoline_edi"; + case X86::R11: + assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); + return "__llvm_retpoline_r11"; + } llvm_unreachable("unexpected reg for retpoline"); } - // When targeting an internal COMDAT thunk use an LLVM-specific name. - switch (Reg) { - case X86::EAX: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_eax"; - case X86::ECX: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_ecx"; - case X86::EDX: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_edx"; - case X86::EDI: - assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); - return "__llvm_retpoline_edi"; - case X86::R11: + if (Subtarget.useLVIControlFlowIntegrity()) { assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); - return "__llvm_retpoline_r11"; + return "__llvm_lvi_thunk_r11"; } - llvm_unreachable("unexpected reg for retpoline"); + llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature"); } MachineBasicBlock * -X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, - MachineBasicBlock *BB) const { +X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI, + MachineBasicBlock *BB) const { // Copy the virtual register into the R11 physical register and // call the retpoline thunk. DebugLoc DL = MI.getDebugLoc(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); Register CalleeVReg = MI.getOperand(0).getReg(); - unsigned Opc = getOpcodeForRetpoline(MI.getOpcode()); + unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode()); // Find an available scratch register to hold the callee. On 64-bit, we can // just use R11, but we scan for uses anyway to ensure we don't generate @@ -31458,7 +31467,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, report_fatal_error("calling convention incompatible with retpoline, no " "available registers"); - const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg); + const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) .addReg(CalleeVReg); @@ -32234,11 +32243,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::TLS_base_addr32: case X86::TLS_base_addr64: return EmitLoweredTLSAddr(MI, BB); - case X86::RETPOLINE_CALL32: - case X86::RETPOLINE_CALL64: - case X86::RETPOLINE_TCRETURN32: - case X86::RETPOLINE_TCRETURN64: - return EmitLoweredRetpoline(MI, BB); + case X86::INDIRECT_THUNK_CALL32: + case X86::INDIRECT_THUNK_CALL64: + case X86::INDIRECT_THUNK_TCRETURN32: + case X86::INDIRECT_THUNK_TCRETURN64: + return EmitLoweredIndirectThunk(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); case X86::CATCHPAD: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3a17099da38f..830cdfc79c0a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1482,8 +1482,8 @@ namespace llvm { MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp new file mode 100644 index 000000000000..36b9c3ccc959 --- /dev/null +++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp @@ -0,0 +1,364 @@ +//==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Pass that injects an MI thunk that is used to lower indirect calls in a way +/// that prevents speculation on some x86 processors and can be used to mitigate +/// security vulnerabilities due to targeted speculative execution and side +/// channels such as CVE-2017-5715. +/// +/// Currently supported thunks include: +/// - Retpoline -- A RET-implemented trampoline that lowers indirect calls +/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization +/// before making an indirect call/jump +/// +/// Note that the reason that this is implemented as a MachineFunctionPass and +/// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline +/// serialize all transformations, which can consume lots of memory. +/// +/// TODO(chandlerc): All of this code could use better comments and +/// documentation. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-retpoline-thunks" + +static const char RetpolineNamePrefix[] = "__llvm_retpoline_"; +static const char R11RetpolineName[] = "__llvm_retpoline_r11"; +static const char EAXRetpolineName[] = "__llvm_retpoline_eax"; +static const char ECXRetpolineName[] = "__llvm_retpoline_ecx"; +static const char EDXRetpolineName[] = "__llvm_retpoline_edx"; +static const char EDIRetpolineName[] = "__llvm_retpoline_edi"; + +static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_"; +static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11"; + +namespace { +template <typename Derived> class ThunkInserter { + Derived &getDerived() { return *static_cast<Derived *>(this); } + +protected: + bool InsertedThunks; + void doInitialization(Module &M) {} + void createThunkFunction(MachineModuleInfo &MMI, StringRef Name); + +public: + void init(Module &M) { + InsertedThunks = false; + getDerived().doInitialization(M); + } + // return `true` if `MMI` or `MF` was modified + bool run(MachineModuleInfo &MMI, MachineFunction &MF); +}; + +struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> { + const char *getThunkPrefix() { return RetpolineNamePrefix; } + bool mayUseThunk(const MachineFunction &MF) { + const auto &STI = MF.getSubtarget<X86Subtarget>(); + return (STI.useRetpolineIndirectCalls() || + STI.useRetpolineIndirectBranches()) && + !STI.useRetpolineExternalThunk(); + } + void insertThunks(MachineModuleInfo &MMI); + void populateThunk(MachineFunction &MF); +}; + +struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> { + const char *getThunkPrefix() { return LVIThunkNamePrefix; } + bool mayUseThunk(const MachineFunction &MF) { + return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity(); + } + void insertThunks(MachineModuleInfo &MMI) { + createThunkFunction(MMI, R11LVIThunkName); + } + void populateThunk(MachineFunction &MF) { + // Grab the entry MBB and erase any other blocks. O0 codegen appears to + // generate two bbs for the entry block. + MachineBasicBlock *Entry = &MF.front(); + Entry->clear(); + while (MF.size() > 1) + MF.erase(std::next(MF.begin())); + + // This code mitigates LVI by replacing each indirect call/jump with a + // direct call/jump to a thunk that looks like: + // ``` + // lfence + // jmpq *%r11 + // ``` + // This ensures that if the value in register %r11 was loaded from memory, + // then the value in %r11 is (architecturally) correct prior to the jump. + const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); + BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11); + MF.front().addLiveIn(X86::R11); + return; + } +}; + +class X86IndirectThunks : public MachineFunctionPass { +public: + static char ID; + + X86IndirectThunks() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { return "X86 Indirect Thunks"; } + + bool doInitialization(Module &M) override; + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineModuleInfoWrapperPass>(); + AU.addPreserved<MachineModuleInfoWrapperPass>(); + } + +private: + std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs; + + // FIXME: When LLVM moves to C++17, these can become folds + template <typename... ThunkInserterT> + static void initTIs(Module &M, + std::tuple<ThunkInserterT...> &ThunkInserters) { + (void)std::initializer_list<int>{ + (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...}; + } + template <typename... ThunkInserterT> + static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF, + std::tuple<ThunkInserterT...> &ThunkInserters) { + bool Modified = false; + (void)std::initializer_list<int>{ + Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...}; + return Modified; + } +}; + +} // end anonymous namespace + +void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) { + if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64) + createThunkFunction(MMI, R11RetpolineName); + else + for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName, + EDIRetpolineName}) + createThunkFunction(MMI, Name); +} + +void RetpolineThunkInserter::populateThunk(MachineFunction &MF) { + bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64; + Register ThunkReg; + if (Is64Bit) { + assert(MF.getName() == "__llvm_retpoline_r11" && + "Should only have an r11 thunk on 64-bit targets"); + + // __llvm_retpoline_r11: + // callq .Lr11_call_target + // .Lr11_capture_spec: + // pause + // lfence + // jmp .Lr11_capture_spec + // .align 16 + // .Lr11_call_target: + // movq %r11, (%rsp) + // retq + ThunkReg = X86::R11; + } else { + // For 32-bit targets we need to emit a collection of thunks for various + // possible scratch registers as well as a fallback that uses EDI, which is + // normally callee saved. + // __llvm_retpoline_eax: + // calll .Leax_call_target + // .Leax_capture_spec: + // pause + // jmp .Leax_capture_spec + // .align 16 + // .Leax_call_target: + // movl %eax, (%esp) # Clobber return addr + // retl + // + // __llvm_retpoline_ecx: + // ... # Same setup + // movl %ecx, (%esp) + // retl + // + // __llvm_retpoline_edx: + // ... # Same setup + // movl %edx, (%esp) + // retl + // + // __llvm_retpoline_edi: + // ... # Same setup + // movl %edi, (%esp) + // retl + if (MF.getName() == EAXRetpolineName) + ThunkReg = X86::EAX; + else if (MF.getName() == ECXRetpolineName) + ThunkReg = X86::ECX; + else if (MF.getName() == EDXRetpolineName) + ThunkReg = X86::EDX; + else if (MF.getName() == EDIRetpolineName) + ThunkReg = X86::EDI; + else + llvm_unreachable("Invalid thunk name on x86-32!"); + } + + const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); + // Grab the entry MBB and erase any other blocks. O0 codegen appears to + // generate two bbs for the entry block. + MachineBasicBlock *Entry = &MF.front(); + Entry->clear(); + while (MF.size() > 1) + MF.erase(std::next(MF.begin())); + + MachineBasicBlock *CaptureSpec = + MF.CreateMachineBasicBlock(Entry->getBasicBlock()); + MachineBasicBlock *CallTarget = + MF.CreateMachineBasicBlock(Entry->getBasicBlock()); + MCSymbol *TargetSym = MF.getContext().createTempSymbol(); + MF.push_back(CaptureSpec); + MF.push_back(CallTarget); + + const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; + const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; + + Entry->addLiveIn(ThunkReg); + BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym); + + // The MIR verifier thinks that the CALL in the entry block will fall through + // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is + // the successor, but the MIR verifier doesn't know how to cope with that. + Entry->addSuccessor(CaptureSpec); + + // In the capture loop for speculation, we want to stop the processor from + // speculating as fast as possible. On Intel processors, the PAUSE instruction + // will block speculation without consuming any execution resources. On AMD + // processors, the PAUSE instruction is (essentially) a nop, so we also use an + // LFENCE instruction which they have advised will stop speculation as well + // with minimal resource utilization. We still end the capture with a jump to + // form an infinite loop to fully guarantee that no matter what implementation + // of the x86 ISA, speculating this code path never escapes. + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE)); + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec); + CaptureSpec->setHasAddressTaken(); + CaptureSpec->addSuccessor(CaptureSpec); + + CallTarget->addLiveIn(ThunkReg); + CallTarget->setHasAddressTaken(); + CallTarget->setAlignment(Align(16)); + + // Insert return address clobber + const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; + const Register SPReg = Is64Bit ? X86::RSP : X86::ESP; + addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false, + 0) + .addReg(ThunkReg); + + CallTarget->back().setPreInstrSymbol(MF, TargetSym); + BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); +} + +template <typename Derived> +void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI, + StringRef Name) { + assert(Name.startswith(getDerived().getThunkPrefix()) && + "Created a thunk with an unexpected prefix!"); + + Module &M = const_cast<Module &>(*MMI.getModule()); + LLVMContext &Ctx = M.getContext(); + auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); + Function *F = + Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); + F->setVisibility(GlobalValue::HiddenVisibility); + F->setComdat(M.getOrInsertComdat(Name)); + + // Add Attributes so that we don't create a frame, unwind information, or + // inline. + AttrBuilder B; + B.addAttribute(llvm::Attribute::NoUnwind); + B.addAttribute(llvm::Attribute::Naked); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); + + // Populate our function a bit so that we can verify. + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); + IRBuilder<> Builder(Entry); + + Builder.CreateRetVoid(); + + // MachineFunctions/MachineBasicBlocks aren't created automatically for the + // IR-level constructs we already made. Create them and insert them into the + // module. + MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); + MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry); + + // Insert EntryMBB into MF. It's not in the module until we do this. + MF.insert(MF.end(), EntryMBB); + // Set MF properties. We never use vregs... + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); +} + +template <typename Derived> +bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) { + // If MF is not a thunk, check to see if we need to insert a thunk. + if (!MF.getName().startswith(getDerived().getThunkPrefix())) { + // If we've already inserted a thunk, nothing else to do. + if (InsertedThunks) + return false; + + // Only add a thunk if one of the functions has the corresponding feature + // enabled in its subtarget, and doesn't enable external thunks. + // FIXME: Conditionalize on indirect calls so we don't emit a thunk when + // nothing will end up calling it. + // FIXME: It's a little silly to look at every function just to enumerate + // the subtargets, but eventually we'll want to look at them for indirect + // calls, so maybe this is OK. + if (!getDerived().mayUseThunk(MF)) + return false; + + getDerived().insertThunks(MMI); + InsertedThunks = true; + return true; + } + + // If this *is* a thunk function, we need to populate it with the correct MI. + getDerived().populateThunk(MF); + return true; +} + +FunctionPass *llvm::createX86IndirectThunksPass() { + return new X86IndirectThunks(); +} + +char X86IndirectThunks::ID = 0; + +bool X86IndirectThunks::doInitialization(Module &M) { + initTIs(M, TIs); + return false; +} + +bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << getPassName() << '\n'); + auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); + return runTIs(MMI, MF, TIs); +} diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 78d8dd3c0d03..1fdac104cb73 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1213,14 +1213,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[Not64BitMode, NotUseIndirectThunkCalls]>; // FIXME: This is disabled for 32-bit PIC mode because the global base // register which is part of the address mode may be assigned a // callee-saved register. def : Pat<(X86tcret (load addr:$dst), imm:$off), (TCRETURNmi addr:$dst, imm:$off)>, - Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>; + Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), (TCRETURNdi tglobaladdr:$dst, imm:$off)>, @@ -1232,21 +1232,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode, NotUseIndirectThunkCalls]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode, NotUseIndirectThunkCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), - (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode, UseRetpolineIndirectCalls]>; + (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[In64BitMode, UseIndirectThunkCalls]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), - (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[Not64BitMode, UseRetpolineIndirectCalls]>; + (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[Not64BitMode, UseIndirectThunkCalls]>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td index 32faeb1a86f2..1842dc19ec2e 100644 --- a/llvm/lib/Target/X86/X86InstrControl.td +++ b/llvm/lib/Target/X86/X86InstrControl.td @@ -237,13 +237,13 @@ let isCall = 1 in Sched<[WriteJumpLd]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst), "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32, - Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>, + Requires<[Not64BitMode,NotUseIndirectThunkCalls]>, Sched<[WriteJump]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst), "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>, OpSize32, Requires<[Not64BitMode,FavorMemIndirectCall, - NotUseRetpolineIndirectCalls]>, + NotUseIndirectThunkCalls]>, Sched<[WriteJumpLd]>; // Non-tracking calls for IBT, use with caution. @@ -334,11 +334,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { Requires<[In64BitMode]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, - Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode,NotUseIndirectThunkCalls]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, Requires<[In64BitMode,FavorMemIndirectCall, - NotUseRetpolineIndirectCalls]>; + NotUseIndirectThunkCalls]>; // Non-tracking calls for IBT, use with caution. let isCodeGenOnly = 1 in { @@ -393,19 +393,19 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1, Uses = [RSP, SSP], usesCustomInserter = 1, SchedRW = [WriteJump] in { - def RETPOLINE_CALL32 : + def INDIRECT_THUNK_CALL32 : PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>, - Requires<[Not64BitMode,UseRetpolineIndirectCalls]>; + Requires<[Not64BitMode,UseIndirectThunkCalls]>; - def RETPOLINE_CALL64 : + def INDIRECT_THUNK_CALL64 : PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>, - Requires<[In64BitMode,UseRetpolineIndirectCalls]>; + Requires<[In64BitMode,UseIndirectThunkCalls]>; - // Retpoline variant of indirect tail calls. + // Indirect thunk variant of indirect tail calls. let isTerminator = 1, isReturn = 1, isBarrier = 1 in { - def RETPOLINE_TCRETURN64 : + def INDIRECT_THUNK_TCRETURN64 : PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>; - def RETPOLINE_TCRETURN32 : + def INDIRECT_THUNK_TCRETURN32 : PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>; } } diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index ca5425e8b89f..93f40c8ec996 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -996,8 +996,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; def HasERMSB : Predicate<"Subtarget->hasERMSB()">; def HasMFence : Predicate<"Subtarget->hasMFence()">; -def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">; -def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">; +def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">; +def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp new file mode 100644 index 000000000000..35fc439998f9 --- /dev/null +++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -0,0 +1,900 @@ +//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: This pass finds Load Value Injection (LVI) gadgets consisting +/// of a load from memory (i.e., SOURCE), and any operation that may transmit +/// the value loaded from memory over a covert channel, or use the value loaded +/// from memory to determine a branch/call target (i.e., SINK). After finding +/// all such gadgets in a given function, the pass minimally inserts LFENCE +/// instructions in such a manner that the following property is satisfied: for +/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at +/// least one LFENCE instruction. The algorithm that implements this minimal +/// insertion is influenced by an academic paper that minimally inserts memory +/// fences for high-performance concurrent programs: +/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf +/// The algorithm implemented in this pass is as follows: +/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the +/// following components: +/// - SOURCE instructions (also includes function arguments) +/// - SINK instructions +/// - Basic block entry points +/// - Basic block terminators +/// - LFENCE instructions +/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e., +/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been +/// mitigated, go to step 6. +/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion. +/// 4. Insert one LFENCE along each CFG edge that was cut in step 3. +/// 5. Go to step 2. +/// 6. If any LFENCEs were inserted, return `true` from runOnMachineFunction() +/// to tell LLVM that the function was modified. +/// +//===----------------------------------------------------------------------===// + +#include "ImmutableGraph.h" +#include "X86.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DOTGraphTraits.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define PASS_KEY "x86-lvi-load" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); +STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); +STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " + "were deployed"); +STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis"); + +static cl::opt<std::string> OptimizePluginPath( + PASS_KEY "-opt-plugin", + cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden); + +static cl::opt<bool> NoConditionalBranches( + PASS_KEY "-no-cbranch", + cl::desc("Don't treat conditional branches as disclosure gadgets. This " + "may improve performance, at the cost of security."), + cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDot( + PASS_KEY "-dot", + cl::desc( + "For each function, emit a dot graph depicting potential LVI gadgets"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDotOnly( + PASS_KEY "-dot-only", + cl::desc("For each function, emit a dot graph depicting potential LVI " + "gadgets, and do not insert any fences"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDotVerify( + PASS_KEY "-dot-verify", + cl::desc("For each function, emit a dot graph to stdout depicting " + "potential LVI gadgets, used for testing purposes only"), + cl::init(false), cl::Hidden); + +static llvm::sys::DynamicLibrary OptimizeDL; +typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size, + unsigned int *edges, int *edge_values, + int *cut_edges /* out */, unsigned int edges_size); +static OptimizeCutT OptimizeCut = nullptr; + +namespace { + +struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> { + static constexpr int GadgetEdgeSentinel = -1; + static constexpr MachineInstr *const ArgNodeSentinel = nullptr; + + using GraphT = ImmutableGraph<MachineInstr *, int>; + using Node = typename GraphT::Node; + using Edge = typename GraphT::Edge; + using size_type = typename GraphT::size_type; + MachineGadgetGraph(std::unique_ptr<Node[]> Nodes, + std::unique_ptr<Edge[]> Edges, size_type NodesSize, + size_type EdgesSize, int NumFences = 0, int NumGadgets = 0) + : GraphT(std::move(Nodes), std::move(Edges), NodesSize, EdgesSize), + NumFences(NumFences), NumGadgets(NumGadgets) {} + static inline bool isCFGEdge(const Edge &E) { + return E.getValue() != GadgetEdgeSentinel; + } + static inline bool isGadgetEdge(const Edge &E) { + return E.getValue() == GadgetEdgeSentinel; + } + int NumFences; + int NumGadgets; +}; + +class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass { +public: + X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Load Hardening"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + +private: + using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>; + using EdgeSet = MachineGadgetGraph::EdgeSet; + using NodeSet = MachineGadgetGraph::NodeSet; + using Gadget = std::pair<MachineInstr *, MachineInstr *>; + + const X86Subtarget *STI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + + std::unique_ptr<MachineGadgetGraph> + getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT, + const MachineDominanceFrontier &MDF) const; + int hardenLoadsWithPlugin(MachineFunction &MF, + std::unique_ptr<MachineGadgetGraph> Graph) const; + int hardenLoadsWithGreedyHeuristic( + MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const; + int elimMitigatedEdgesAndNodes(MachineGadgetGraph &G, + EdgeSet &ElimEdges /* in, out */, + NodeSet &ElimNodes /* in, out */) const; + std::unique_ptr<MachineGadgetGraph> + trimMitigatedEdges(std::unique_ptr<MachineGadgetGraph> Graph) const; + void findAndCutEdges(MachineGadgetGraph &G, + EdgeSet &CutEdges /* out */) const; + int insertFences(MachineFunction &MF, MachineGadgetGraph &G, + EdgeSet &CutEdges /* in, out */) const; + bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const; + bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const; + inline bool isFence(const MachineInstr *MI) const { + return MI && (MI->getOpcode() == X86::LFENCE || + (STI->useLVIControlFlowIntegrity() && MI->isCall())); + } +}; + +} // end anonymous namespace + +namespace llvm { + +template <> +struct GraphTraits<MachineGadgetGraph *> + : GraphTraits<ImmutableGraph<MachineInstr *, int> *> {}; + +template <> +struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits { + using GraphType = MachineGadgetGraph; + using Traits = llvm::GraphTraits<GraphType *>; + using NodeRef = typename Traits::NodeRef; + using EdgeRef = typename Traits::EdgeRef; + using ChildIteratorType = typename Traits::ChildIteratorType; + using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType; + + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(NodeRef Node, GraphType *) { + if (Node->getValue() == MachineGadgetGraph::ArgNodeSentinel) + return "ARGS"; + + std::string Str; + raw_string_ostream OS(Str); + OS << *Node->getValue(); + return OS.str(); + } + + static std::string getNodeAttributes(NodeRef Node, GraphType *) { + MachineInstr *MI = Node->getValue(); + if (MI == MachineGadgetGraph::ArgNodeSentinel) + return "color = blue"; + if (MI->getOpcode() == X86::LFENCE) + return "color = green"; + return ""; + } + + static std::string getEdgeAttributes(NodeRef, ChildIteratorType E, + GraphType *) { + int EdgeVal = (*E.getCurrent()).getValue(); + return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal) + : "color = red, style = \"dashed\""; + } +}; + +} // end namespace llvm + +constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel; +constexpr int MachineGadgetGraph::GadgetEdgeSentinel; + +char X86LoadValueInjectionLoadHardeningPass::ID = 0; + +void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage( + AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineDominanceFrontier>(); + AU.setPreservesCFG(); +} + +static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF, + MachineGadgetGraph *G) { + WriteGraph(OS, G, /*ShortNames*/ false, + "Speculative gadgets for \"" + MF.getName() + "\" function"); +} + +bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + STI = &MF.getSubtarget<X86Subtarget>(); + if (!STI->useLVILoadHardening()) + return false; + + // FIXME: support 32-bit + if (!STI->is64Bit()) + report_fatal_error("LVI load hardening is only supported on 64-bit", false); + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + ++NumFunctionsConsidered; + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); + LLVM_DEBUG(dbgs() << "Building gadget graph...\n"); + const auto &MLI = getAnalysis<MachineLoopInfo>(); + const auto &MDT = getAnalysis<MachineDominatorTree>(); + const auto &MDF = getAnalysis<MachineDominanceFrontier>(); + std::unique_ptr<MachineGadgetGraph> Graph = getGadgetGraph(MF, MLI, MDT, MDF); + LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n"); + if (Graph == nullptr) + return false; // didn't find any gadgets + + if (EmitDotVerify) { + WriteGadgetGraph(outs(), MF, Graph.get()); + return false; + } + + if (EmitDot || EmitDotOnly) { + LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n"); + std::error_code FileError; + std::string FileName = "lvi."; + FileName += MF.getName(); + FileName += ".dot"; + raw_fd_ostream FileOut(FileName, FileError); + if (FileError) + errs() << FileError.message(); + WriteGadgetGraph(FileOut, MF, Graph.get()); + FileOut.close(); + LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n"); + if (EmitDotOnly) + return false; + } + + int FencesInserted; + if (!OptimizePluginPath.empty()) { + if (!OptimizeDL.isValid()) { + std::string ErrorMsg; + OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary( + OptimizePluginPath.c_str(), &ErrorMsg); + if (!ErrorMsg.empty()) + report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"'); + OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut"); + if (!OptimizeCut) + report_fatal_error("Invalid optimization plugin"); + } + FencesInserted = hardenLoadsWithPlugin(MF, std::move(Graph)); + } else { // Use the default greedy heuristic + FencesInserted = hardenLoadsWithGreedyHeuristic(MF, std::move(Graph)); + } + + if (FencesInserted > 0) + ++NumFunctionsMitigated; + NumFences += FencesInserted; + return (FencesInserted > 0); +} + +std::unique_ptr<MachineGadgetGraph> +X86LoadValueInjectionLoadHardeningPass::getGadgetGraph( + MachineFunction &MF, const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT, + const MachineDominanceFrontier &MDF) const { + using namespace rdf; + + // Build the Register Dataflow Graph using the RDF framework + TargetOperandInfo TOI{*TII}; + DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI}; + DFG.build(); + Liveness L{MF.getRegInfo(), DFG}; + L.computePhiInfo(); + + GraphBuilder Builder; + using GraphIter = typename GraphBuilder::BuilderNodeRef; + DenseMap<MachineInstr *, GraphIter> NodeMap; + int FenceCount = 0, GadgetCount = 0; + auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) { + auto Ref = NodeMap.find(MI); + if (Ref == NodeMap.end()) { + auto I = Builder.addVertex(MI); + NodeMap[MI] = I; + return std::pair<GraphIter, bool>{I, true}; + } + return std::pair<GraphIter, bool>{Ref->getSecond(), false}; + }; + + // The `Transmitters` map memoizes transmitters found for each def. If a def + // has not yet been analyzed, then it will not appear in the map. If a def + // has been analyzed and was determined not to have any transmitters, then + // its list of transmitters will be empty. + DenseMap<NodeId, std::vector<NodeId>> Transmitters; + + // Analyze all machine instructions to find gadgets and LFENCEs, adding + // each interesting value to `Nodes` + auto AnalyzeDef = [&](NodeAddr<DefNode *> SourceDef) { + SmallSet<NodeId, 8> UsesVisited, DefsVisited; + std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain = + [&](NodeAddr<DefNode *> Def) { + if (Transmitters.find(Def.Id) != Transmitters.end()) + return; // Already analyzed `Def` + + // Use RDF to find all the uses of `Def` + rdf::NodeSet Uses; + RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG)); + for (auto UseID : L.getAllReachedUses(DefReg, Def)) { + auto Use = DFG.addr<UseNode *>(UseID); + if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node + NodeAddr<PhiNode *> Phi = Use.Addr->getOwner(DFG); + for (auto I : L.getRealUses(Phi.Id)) { + if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) { + for (auto UA : I.second) + Uses.emplace(UA.first); + } + } + } else { // not a phi node + Uses.emplace(UseID); + } + } + + // For each use of `Def`, we want to know whether: + // (1) The use can leak the Def'ed value, + // (2) The use can further propagate the Def'ed value to more defs + for (auto UseID : Uses) { + if (!UsesVisited.insert(UseID).second) + continue; // Already visited this use of `Def` + + auto Use = DFG.addr<UseNode *>(UseID); + assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef)); + MachineOperand &UseMO = Use.Addr->getOp(); + MachineInstr &UseMI = *UseMO.getParent(); + assert(UseMO.isReg()); + + // We naively assume that an instruction propagates any loaded + // uses to all defs unless the instruction is a call, in which + // case all arguments will be treated as gadget sources during + // analysis of the callee function. + if (UseMI.isCall()) + continue; + + // Check whether this use can transmit (leak) its value. + if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) || + (!NoConditionalBranches && + instrUsesRegToBranch(UseMI, UseMO.getReg()))) { + Transmitters[Def.Id].push_back(Use.Addr->getOwner(DFG).Id); + if (UseMI.mayLoad()) + continue; // Found a transmitting load -- no need to continue + // traversing its defs (i.e., this load will become + // a new gadget source anyways). + } + + // Check whether the use propagates to more defs. + NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)}; + rdf::NodeList AnalyzedChildDefs; + for (auto &ChildDef : + Owner.Addr->members_if(DataFlowGraph::IsDef, DFG)) { + if (!DefsVisited.insert(ChildDef.Id).second) + continue; // Already visited this def + if (Def.Addr->getAttrs() & NodeAttrs::Dead) + continue; + if (Def.Id == ChildDef.Id) + continue; // `Def` uses itself (e.g., increment loop counter) + + AnalyzeDefUseChain(ChildDef); + + // `Def` inherits all of its child defs' transmitters. + for (auto TransmitterId : Transmitters[ChildDef.Id]) + Transmitters[Def.Id].push_back(TransmitterId); + } + } + + // Note that this statement adds `Def.Id` to the map if no + // transmitters were found for `Def`. + auto &DefTransmitters = Transmitters[Def.Id]; + + // Remove duplicate transmitters + llvm::sort(DefTransmitters); + DefTransmitters.erase( + std::unique(DefTransmitters.begin(), DefTransmitters.end()), + DefTransmitters.end()); + }; + + // Find all of the transmitters + AnalyzeDefUseChain(SourceDef); + auto &SourceDefTransmitters = Transmitters[SourceDef.Id]; + if (SourceDefTransmitters.empty()) + return; // No transmitters for `SourceDef` + + MachineInstr *Source = SourceDef.Addr->getFlags() & NodeAttrs::PhiRef + ? MachineGadgetGraph::ArgNodeSentinel + : SourceDef.Addr->getOp().getParent(); + auto GadgetSource = MaybeAddNode(Source); + // Each transmitter is a sink for `SourceDef`. + for (auto TransmitterId : SourceDefTransmitters) { + MachineInstr *Sink = DFG.addr<StmtNode *>(TransmitterId).Addr->getCode(); + auto GadgetSink = MaybeAddNode(Sink); + // Add the gadget edge to the graph. + Builder.addEdge(MachineGadgetGraph::GadgetEdgeSentinel, + GadgetSource.first, GadgetSink.first); + ++GadgetCount; + } + }; + + LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n"); + // Analyze function arguments + NodeAddr<BlockNode *> EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG); + for (NodeAddr<PhiNode *> ArgPhi : + EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) { + NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG); + llvm::for_each(Defs, AnalyzeDef); + } + // Analyze every instruction in MF + for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) { + for (NodeAddr<StmtNode *> SA : + BA.Addr->members_if(DataFlowGraph::IsCode<NodeAttrs::Stmt>, DFG)) { + MachineInstr *MI = SA.Addr->getCode(); + if (isFence(MI)) { + MaybeAddNode(MI); + ++FenceCount; + } else if (MI->mayLoad()) { + NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG); + llvm::for_each(Defs, AnalyzeDef); + } + } + } + LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n"); + LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n"); + if (GadgetCount == 0) + return nullptr; + NumGadgets += GadgetCount; + + // Traverse CFG to build the rest of the graph + SmallSet<MachineBasicBlock *, 8> BlocksVisited; + std::function<void(MachineBasicBlock *, GraphIter, unsigned)> TraverseCFG = + [&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) { + unsigned LoopDepth = MLI.getLoopDepth(MBB); + if (!MBB->empty()) { + // Always add the first instruction in each block + auto NI = MBB->begin(); + auto BeginBB = MaybeAddNode(&*NI); + Builder.addEdge(ParentDepth, GI, BeginBB.first); + if (!BlocksVisited.insert(MBB).second) + return; + + // Add any instructions within the block that are gadget components + GI = BeginBB.first; + while (++NI != MBB->end()) { + auto Ref = NodeMap.find(&*NI); + if (Ref != NodeMap.end()) { + Builder.addEdge(LoopDepth, GI, Ref->getSecond()); + GI = Ref->getSecond(); + } + } + + // Always add the terminator instruction, if one exists + auto T = MBB->getFirstTerminator(); + if (T != MBB->end()) { + auto EndBB = MaybeAddNode(&*T); + if (EndBB.second) + Builder.addEdge(LoopDepth, GI, EndBB.first); + GI = EndBB.first; + } + } + for (MachineBasicBlock *Succ : MBB->successors()) + TraverseCFG(Succ, GI, LoopDepth); + }; + // ArgNodeSentinel is a pseudo-instruction that represents MF args in the + // GadgetGraph + GraphIter ArgNode = MaybeAddNode(MachineGadgetGraph::ArgNodeSentinel).first; + TraverseCFG(&MF.front(), ArgNode, 0); + std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)}; + LLVM_DEBUG(dbgs() << "Found " << G->nodes_size() << " nodes\n"); + return G; +} + +// Returns the number of remaining gadget edges that could not be eliminated +int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes( + MachineGadgetGraph &G, MachineGadgetGraph::EdgeSet &ElimEdges /* in, out */, + MachineGadgetGraph::NodeSet &ElimNodes /* in, out */) const { + if (G.NumFences > 0) { + // Eliminate fences and CFG edges that ingress and egress the fence, as + // they are trivially mitigated. + for (const auto &E : G.edges()) { + const MachineGadgetGraph::Node *Dest = E.getDest(); + if (isFence(Dest->getValue())) { + ElimNodes.insert(*Dest); + ElimEdges.insert(E); + for (const auto &DE : Dest->edges()) + ElimEdges.insert(DE); + } + } + } + + // Find and eliminate gadget edges that have been mitigated. + int MitigatedGadgets = 0, RemainingGadgets = 0; + MachineGadgetGraph::NodeSet ReachableNodes{G}; + for (const auto &RootN : G.nodes()) { + if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge)) + continue; // skip this node if it isn't a gadget source + + // Find all of the nodes that are CFG-reachable from RootN using DFS + ReachableNodes.clear(); + std::function<void(const MachineGadgetGraph::Node *, bool)> + FindReachableNodes = + [&](const MachineGadgetGraph::Node *N, bool FirstNode) { + if (!FirstNode) + ReachableNodes.insert(*N); + for (const auto &E : N->edges()) { + const MachineGadgetGraph::Node *Dest = E.getDest(); + if (MachineGadgetGraph::isCFGEdge(E) && + !ElimEdges.contains(E) && !ReachableNodes.contains(*Dest)) + FindReachableNodes(Dest, false); + } + }; + FindReachableNodes(&RootN, true); + + // Any gadget whose sink is unreachable has been mitigated + for (const auto &E : RootN.edges()) { + if (MachineGadgetGraph::isGadgetEdge(E)) { + if (ReachableNodes.contains(*E.getDest())) { + // This gadget's sink is reachable + ++RemainingGadgets; + } else { // This gadget's sink is unreachable, and therefore mitigated + ++MitigatedGadgets; + ElimEdges.insert(E); + } + } + } + } + return RemainingGadgets; +} + +std::unique_ptr<MachineGadgetGraph> +X86LoadValueInjectionLoadHardeningPass::trimMitigatedEdges( + std::unique_ptr<MachineGadgetGraph> Graph) const { + MachineGadgetGraph::NodeSet ElimNodes{*Graph}; + MachineGadgetGraph::EdgeSet ElimEdges{*Graph}; + int RemainingGadgets = + elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes); + if (ElimEdges.empty() && ElimNodes.empty()) { + Graph->NumFences = 0; + Graph->NumGadgets = RemainingGadgets; + } else { + Graph = GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, 0 /* NumFences */, + RemainingGadgets); + } + return Graph; +} + +int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin( + MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const { + int FencesInserted = 0; + + do { + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); + Graph = trimMitigatedEdges(std::move(Graph)); + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); + if (Graph->NumGadgets == 0) + break; + + LLVM_DEBUG(dbgs() << "Cutting edges...\n"); + EdgeSet CutEdges{*Graph}; + auto Nodes = std::make_unique<unsigned int[]>(Graph->nodes_size() + + 1 /* terminator node */); + auto Edges = std::make_unique<unsigned int[]>(Graph->edges_size()); + auto EdgeCuts = std::make_unique<int[]>(Graph->edges_size()); + auto EdgeValues = std::make_unique<int[]>(Graph->edges_size()); + for (const auto &N : Graph->nodes()) { + Nodes[Graph->getNodeIndex(N)] = Graph->getEdgeIndex(*N.edges_begin()); + } + Nodes[Graph->nodes_size()] = Graph->edges_size(); // terminator node + for (const auto &E : Graph->edges()) { + Edges[Graph->getEdgeIndex(E)] = Graph->getNodeIndex(*E.getDest()); + EdgeValues[Graph->getEdgeIndex(E)] = E.getValue(); + } + OptimizeCut(Nodes.get(), Graph->nodes_size(), Edges.get(), EdgeValues.get(), + EdgeCuts.get(), Graph->edges_size()); + for (int I = 0; I < Graph->edges_size(); ++I) + if (EdgeCuts[I]) + CutEdges.set(I); + LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); + LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); + + LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); + FencesInserted += insertFences(MF, *Graph, CutEdges); + LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); + LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); + + Graph = GraphBuilder::trim(*Graph, MachineGadgetGraph::NodeSet{*Graph}, + CutEdges); + } while (true); + + return FencesInserted; +} + +int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithGreedyHeuristic( + MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const { + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); + Graph = trimMitigatedEdges(std::move(Graph)); + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); + if (Graph->NumGadgets == 0) + return 0; + + LLVM_DEBUG(dbgs() << "Cutting edges...\n"); + MachineGadgetGraph::NodeSet ElimNodes{*Graph}, GadgetSinks{*Graph}; + MachineGadgetGraph::EdgeSet ElimEdges{*Graph}, CutEdges{*Graph}; + auto IsCFGEdge = [&ElimEdges, &CutEdges](const MachineGadgetGraph::Edge &E) { + return !ElimEdges.contains(E) && !CutEdges.contains(E) && + MachineGadgetGraph::isCFGEdge(E); + }; + auto IsGadgetEdge = [&ElimEdges, + &CutEdges](const MachineGadgetGraph::Edge &E) { + return !ElimEdges.contains(E) && !CutEdges.contains(E) && + MachineGadgetGraph::isGadgetEdge(E); + }; + + // FIXME: this is O(E^2), we could probably do better. + do { + // Find the cheapest CFG edge that will eliminate a gadget (by being + // egress from a SOURCE node or ingress to a SINK node), and cut it. + const MachineGadgetGraph::Edge *CheapestSoFar = nullptr; + + // First, collect all gadget source and sink nodes. + MachineGadgetGraph::NodeSet GadgetSources{*Graph}, GadgetSinks{*Graph}; + for (const auto &N : Graph->nodes()) { + if (ElimNodes.contains(N)) + continue; + for (const auto &E : N.edges()) { + if (IsGadgetEdge(E)) { + GadgetSources.insert(N); + GadgetSinks.insert(*E.getDest()); + } + } + } + + // Next, look for the cheapest CFG edge which, when cut, is guaranteed to + // mitigate at least one gadget by either: + // (a) being egress from a gadget source, or + // (b) being ingress to a gadget sink. + for (const auto &N : Graph->nodes()) { + if (ElimNodes.contains(N)) + continue; + for (const auto &E : N.edges()) { + if (IsCFGEdge(E)) { + if (GadgetSources.contains(N) || GadgetSinks.contains(*E.getDest())) { + if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue()) + CheapestSoFar = &E; + } + } + } + } + + assert(CheapestSoFar && "Failed to cut an edge"); + CutEdges.insert(*CheapestSoFar); + ElimEdges.insert(*CheapestSoFar); + } while (elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes)); + LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); + LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); + + LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); + int FencesInserted = insertFences(MF, *Graph, CutEdges); + LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); + LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); + + return FencesInserted; +} + +int X86LoadValueInjectionLoadHardeningPass::insertFences( + MachineFunction &MF, MachineGadgetGraph &G, + EdgeSet &CutEdges /* in, out */) const { + int FencesInserted = 0; + for (const auto &N : G.nodes()) { + for (const auto &E : N.edges()) { + if (CutEdges.contains(E)) { + MachineInstr *MI = N.getValue(), *Prev; + MachineBasicBlock *MBB; // Insert an LFENCE in this MBB + MachineBasicBlock::iterator InsertionPt; // ...at this point + if (MI == MachineGadgetGraph::ArgNodeSentinel) { + // insert LFENCE at beginning of entry block + MBB = &MF.front(); + InsertionPt = MBB->begin(); + Prev = nullptr; + } else if (MI->isBranch()) { // insert the LFENCE before the branch + MBB = MI->getParent(); + InsertionPt = MI; + Prev = MI->getPrevNode(); + // Remove all egress CFG edges from this branch because the inserted + // LFENCE prevents gadgets from crossing the branch. + for (const auto &E : N.edges()) { + if (MachineGadgetGraph::isCFGEdge(E)) + CutEdges.insert(E); + } + } else { // insert the LFENCE after the instruction + MBB = MI->getParent(); + InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end(); + Prev = InsertionPt == MBB->end() + ? (MBB->empty() ? nullptr : &MBB->back()) + : InsertionPt->getPrevNode(); + } + // Ensure this insertion is not redundant (two LFENCEs in sequence). + if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) && + (!Prev || !isFence(Prev))) { + BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); + ++FencesInserted; + } + } + } + } + return FencesInserted; +} + +bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory( + const MachineInstr &MI, unsigned Reg) const { + if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE || + MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE) + return false; + + // FIXME: This does not handle pseudo loading instruction like TCRETURN* + const MCInstrDesc &Desc = MI.getDesc(); + int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); + if (MemRefBeginIdx < 0) { + LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading " + "instruction:\n"; + MI.print(dbgs()); dbgs() << '\n';); + return false; + } + MemRefBeginIdx += X86II::getOperandBias(Desc); + + const MachineOperand &BaseMO = + MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); + const MachineOperand &IndexMO = + MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); + return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister && + TRI->regsOverlap(BaseMO.getReg(), Reg)) || + (IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister && + TRI->regsOverlap(IndexMO.getReg(), Reg)); +} + +bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch( + const MachineInstr &MI, unsigned Reg) const { + if (!MI.isConditionalBranch()) + return false; + for (const MachineOperand &Use : MI.uses()) + if (Use.isReg() && Use.getReg() == Reg) + return true; + return false; +} + +INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY, + "X86 LVI load hardening", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) +INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY, + "X86 LVI load hardening", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() { + return new X86LoadValueInjectionLoadHardeningPass(); +} + +namespace { + +/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive +/// analysis passes that add complexity to the pipeline. This complexity +/// can cause noticable overhead when no optimizations are enabled, i.e., -O0. +/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to +/// provide the same security as the optimized pass, but without adding +/// unnecessary complexity to the LLVM pipeline. +/// +/// The behavior of this pass is simply to insert an LFENCE after every load +/// instruction. +class X86LoadValueInjectionLoadHardeningUnoptimizedPass + : public MachineFunctionPass { +public: + X86LoadValueInjectionLoadHardeningUnoptimizedPass() + : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)"; + } + bool runOnMachineFunction(MachineFunction &MF) override; + static char ID; +}; + +} // end anonymous namespace + +char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0; + +bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>(); + if (!STI->useLVILoadHardening()) + return false; + + // FIXME: support 32-bit + if (!STI->is64Bit()) + report_fatal_error("LVI load hardening is only supported on 64-bit", false); + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + bool Modified = false; + ++NumFunctionsConsidered; + + const TargetInstrInfo *TII = STI->getInstrInfo(); + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE || + MI.getOpcode() == X86::MFENCE) + continue; + + MachineBasicBlock::iterator InsertionPt = + MI.getNextNode() ? MI.getNextNode() : MBB.end(); + BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); + ++NumFences; + Modified = true; + } + } + + if (Modified) + ++NumFunctionsMitigated; + + return Modified; +} + +INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY, + "X86 LVI load hardening", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() { + return new X86LoadValueInjectionLoadHardeningUnoptimizedPass(); +} diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp new file mode 100644 index 000000000000..6e1134a25950 --- /dev/null +++ b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp @@ -0,0 +1,143 @@ +//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: Replaces every `ret` instruction with the sequence: +/// ``` +/// pop <scratch-reg> +/// lfence +/// jmp *<scratch-reg> +/// ``` +/// where `<scratch-reg>` is some available scratch register, according to the +/// calling convention of the function being mitigated. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include <bitset> + +using namespace llvm; + +#define PASS_KEY "x86-lvi-ret" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); +STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); +STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " + "were deployed"); + +namespace { + +class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass { +public: + X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Ret-Hardening"; + } + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; +}; + +} // end anonymous namespace + +char X86LoadValueInjectionRetHardeningPass::ID = 0; + +bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction( + MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>(); + if (!Subtarget->useLVIControlFlowIntegrity() || !Subtarget->is64Bit()) + return false; // FIXME: support 32-bit + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + ++NumFunctionsConsidered; + const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const X86InstrInfo *TII = Subtarget->getInstrInfo(); + unsigned ClobberReg = X86::NoRegister; + std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s; + UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer + UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer + UnclobberableGR64s.set(X86::RAX); // used for function return + UnclobberableGR64s.set(X86::RDX); // used for function return + + // We can clobber any register allowed by the function's calling convention. + for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR) + UnclobberableGR64s.set(Reg); + for (auto &Reg : X86::GR64RegClass) { + if (!UnclobberableGR64s.test(Reg)) { + ClobberReg = Reg; + break; + } + } + + if (ClobberReg != X86::NoRegister) { + LLVM_DEBUG(dbgs() << "Selected register " + << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg) + << " to clobber\n"); + } else { + LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n"); + } + + bool Modified = false; + for (auto &MBB : MF) { + if (MBB.empty()) + continue; + + MachineInstr &MI = MBB.back(); + if (MI.getOpcode() != X86::RETQ) + continue; + + if (ClobberReg != X86::NoRegister) { + MBB.erase_instr(&MI); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r)) + .addReg(ClobberReg, RegState::Define) + .setMIFlag(MachineInstr::FrameDestroy); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r)) + .addReg(ClobberReg); + } else { + // In case there is no available scratch register, we can still read from + // RSP to assert that RSP points to a valid page. The write to RSP is + // also helpful because it verifies that the stack's write permissions + // are intact. + MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE)); + addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)), + X86::RSP, false, 0) + .addImm(0) + ->addRegisterDead(X86::EFLAGS, TRI); + } + + ++NumFences; + Modified = true; + } + + if (Modified) + ++NumFunctionsMitigated; + return Modified; +} + +INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY, + "X86 LVI ret hardener", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() { + return new X86LoadValueInjectionRetHardeningPass(); +} diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 7f49c6e861d4..f5caaaae4d84 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1220,8 +1220,8 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, break; case MachineOperand::MO_Register: // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpolineIndirectCalls()) - report_fatal_error("Lowering register statepoints with retpoline not " + if (Subtarget->useIndirectThunkCalls()) + report_fatal_error("Lowering register statepoints with thunks not " "yet implemented."); CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); CallOpcode = X86::CALL64r; @@ -1399,9 +1399,9 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, EmitAndCountInstruction( MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpolineIndirectCalls()) + if (Subtarget->useIndirectThunkCalls()) report_fatal_error( - "Lowering patchpoint with retpoline not yet implemented."); + "Lowering patchpoint with thunks not yet implemented."); EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); } diff --git a/llvm/lib/Target/X86/X86RetpolineThunks.cpp b/llvm/lib/Target/X86/X86RetpolineThunks.cpp deleted file mode 100644 index 9085d7f068ac..000000000000 --- a/llvm/lib/Target/X86/X86RetpolineThunks.cpp +++ /dev/null @@ -1,286 +0,0 @@ -//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86 --=====// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// Pass that injects an MI thunk implementing a "retpoline". This is -/// a RET-implemented trampoline that is used to lower indirect calls in a way -/// that prevents speculation on some x86 processors and can be used to mitigate -/// security vulnerabilities due to targeted speculative execution and side -/// channels such as CVE-2017-5715. -/// -/// TODO(chandlerc): All of this code could use better comments and -/// documentation. -/// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86InstrBuilder.h" -#include "X86Subtarget.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "x86-retpoline-thunks" - -static const char ThunkNamePrefix[] = "__llvm_retpoline_"; -static const char R11ThunkName[] = "__llvm_retpoline_r11"; -static const char EAXThunkName[] = "__llvm_retpoline_eax"; -static const char ECXThunkName[] = "__llvm_retpoline_ecx"; -static const char EDXThunkName[] = "__llvm_retpoline_edx"; -static const char EDIThunkName[] = "__llvm_retpoline_edi"; - -namespace { -class X86RetpolineThunks : public MachineFunctionPass { -public: - static char ID; - - X86RetpolineThunks() : MachineFunctionPass(ID) {} - - StringRef getPassName() const override { return "X86 Retpoline Thunks"; } - - bool doInitialization(Module &M) override; - bool runOnMachineFunction(MachineFunction &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired<MachineModuleInfoWrapperPass>(); - AU.addPreserved<MachineModuleInfoWrapperPass>(); - } - -private: - MachineModuleInfo *MMI = nullptr; - const TargetMachine *TM = nullptr; - bool Is64Bit = false; - const X86Subtarget *STI = nullptr; - const X86InstrInfo *TII = nullptr; - - bool InsertedThunks = false; - - void createThunkFunction(Module &M, StringRef Name); - void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg); - void populateThunk(MachineFunction &MF, unsigned Reg); -}; - -} // end anonymous namespace - -FunctionPass *llvm::createX86RetpolineThunksPass() { - return new X86RetpolineThunks(); -} - -char X86RetpolineThunks::ID = 0; - -bool X86RetpolineThunks::doInitialization(Module &M) { - InsertedThunks = false; - return false; -} - -bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) { - LLVM_DEBUG(dbgs() << getPassName() << '\n'); - - TM = &MF.getTarget();; - STI = &MF.getSubtarget<X86Subtarget>(); - TII = STI->getInstrInfo(); - Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64; - - MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); - Module &M = const_cast<Module &>(*MMI->getModule()); - - // If this function is not a thunk, check to see if we need to insert - // a thunk. - if (!MF.getName().startswith(ThunkNamePrefix)) { - // If we've already inserted a thunk, nothing else to do. - if (InsertedThunks) - return false; - - // Only add a thunk if one of the functions has the retpoline feature - // enabled in its subtarget, and doesn't enable external thunks. - // FIXME: Conditionalize on indirect calls so we don't emit a thunk when - // nothing will end up calling it. - // FIXME: It's a little silly to look at every function just to enumerate - // the subtargets, but eventually we'll want to look at them for indirect - // calls, so maybe this is OK. - if ((!STI->useRetpolineIndirectCalls() && - !STI->useRetpolineIndirectBranches()) || - STI->useRetpolineExternalThunk()) - return false; - - // Otherwise, we need to insert the thunk. - // WARNING: This is not really a well behaving thing to do in a function - // pass. We extract the module and insert a new function (and machine - // function) directly into the module. - if (Is64Bit) - createThunkFunction(M, R11ThunkName); - else - for (StringRef Name : - {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName}) - createThunkFunction(M, Name); - InsertedThunks = true; - return true; - } - - // If this *is* a thunk function, we need to populate it with the correct MI. - if (Is64Bit) { - assert(MF.getName() == "__llvm_retpoline_r11" && - "Should only have an r11 thunk on 64-bit targets"); - - // __llvm_retpoline_r11: - // callq .Lr11_call_target - // .Lr11_capture_spec: - // pause - // lfence - // jmp .Lr11_capture_spec - // .align 16 - // .Lr11_call_target: - // movq %r11, (%rsp) - // retq - populateThunk(MF, X86::R11); - } else { - // For 32-bit targets we need to emit a collection of thunks for various - // possible scratch registers as well as a fallback that uses EDI, which is - // normally callee saved. - // __llvm_retpoline_eax: - // calll .Leax_call_target - // .Leax_capture_spec: - // pause - // jmp .Leax_capture_spec - // .align 16 - // .Leax_call_target: - // movl %eax, (%esp) # Clobber return addr - // retl - // - // __llvm_retpoline_ecx: - // ... # Same setup - // movl %ecx, (%esp) - // retl - // - // __llvm_retpoline_edx: - // ... # Same setup - // movl %edx, (%esp) - // retl - // - // __llvm_retpoline_edi: - // ... # Same setup - // movl %edi, (%esp) - // retl - if (MF.getName() == EAXThunkName) - populateThunk(MF, X86::EAX); - else if (MF.getName() == ECXThunkName) - populateThunk(MF, X86::ECX); - else if (MF.getName() == EDXThunkName) - populateThunk(MF, X86::EDX); - else if (MF.getName() == EDIThunkName) - populateThunk(MF, X86::EDI); - else - llvm_unreachable("Invalid thunk name on x86-32!"); - } - - return true; -} - -void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) { - assert(Name.startswith(ThunkNamePrefix) && - "Created a thunk with an unexpected prefix!"); - - LLVMContext &Ctx = M.getContext(); - auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); - Function *F = - Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); - F->setVisibility(GlobalValue::HiddenVisibility); - F->setComdat(M.getOrInsertComdat(Name)); - - // Add Attributes so that we don't create a frame, unwind information, or - // inline. - AttrBuilder B; - B.addAttribute(llvm::Attribute::NoUnwind); - B.addAttribute(llvm::Attribute::Naked); - F->addAttributes(llvm::AttributeList::FunctionIndex, B); - - // Populate our function a bit so that we can verify. - BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); - IRBuilder<> Builder(Entry); - - Builder.CreateRetVoid(); - - // MachineFunctions/MachineBasicBlocks aren't created automatically for the - // IR-level constructs we already made. Create them and insert them into the - // module. - MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); - MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry); - - // Insert EntryMBB into MF. It's not in the module until we do this. - MF.insert(MF.end(), EntryMBB); -} - -void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB, - unsigned Reg) { - const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; - const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP; - addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0) - .addReg(Reg); -} - -void X86RetpolineThunks::populateThunk(MachineFunction &MF, - unsigned Reg) { - // Set MF properties. We never use vregs... - MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); - - // Grab the entry MBB and erase any other blocks. O0 codegen appears to - // generate two bbs for the entry block. - MachineBasicBlock *Entry = &MF.front(); - Entry->clear(); - while (MF.size() > 1) - MF.erase(std::next(MF.begin())); - - MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); - MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); - MCSymbol *TargetSym = MF.getContext().createTempSymbol(); - MF.push_back(CaptureSpec); - MF.push_back(CallTarget); - - const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; - const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; - - Entry->addLiveIn(Reg); - BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym); - - // The MIR verifier thinks that the CALL in the entry block will fall through - // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is - // the successor, but the MIR verifier doesn't know how to cope with that. - Entry->addSuccessor(CaptureSpec); - - // In the capture loop for speculation, we want to stop the processor from - // speculating as fast as possible. On Intel processors, the PAUSE instruction - // will block speculation without consuming any execution resources. On AMD - // processors, the PAUSE instruction is (essentially) a nop, so we also use an - // LFENCE instruction which they have advised will stop speculation as well - // with minimal resource utilization. We still end the capture with a jump to - // form an infinite loop to fully guarantee that no matter what implementation - // of the x86 ISA, speculating this code path never escapes. - BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE)); - BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE)); - BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec); - CaptureSpec->setHasAddressTaken(); - CaptureSpec->addSuccessor(CaptureSpec); - - CallTarget->addLiveIn(Reg); - CallTarget->setHasAddressTaken(); - CallTarget->setAlignment(Align(16)); - insertRegReturnAddrClobber(*CallTarget, Reg); - CallTarget->back().setPreInstrSymbol(MF, TargetSym); - BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); -} diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index f4e8d30328ca..af5153243c8b 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -421,6 +421,16 @@ protected: /// than emitting one inside the compiler. bool UseRetpolineExternalThunk = false; + /// Prevent generation of indirect call/branch instructions from memory, + /// and force all indirect call/branch instructions from a register to be + /// preceded by an LFENCE. Also decompose RET instructions into a + /// POP+LFENCE+JMP sequence. + bool UseLVIControlFlowIntegrity = false; + + /// Insert LFENCE instructions to prevent data speculatively injected into + /// loads from being used maliciously. + bool UseLVILoadHardening = false; + /// Use software floating point for code generation. bool UseSoftFloat = false; @@ -707,8 +717,21 @@ public: return UseRetpolineIndirectBranches; } bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } + + // These are generic getters that OR together all of the thunk types + // supported by the subtarget. Therefore useIndirectThunk*() will return true + // if any respective thunk feature is enabled. + bool useIndirectThunkCalls() const { + return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity(); + } + bool useIndirectThunkBranches() const { + return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); + } + bool preferMaskRegisters() const { return PreferMaskRegisters; } bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; } + bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; } + bool useLVILoadHardening() const { return UseLVILoadHardening; } unsigned getPreferVectorWidth() const { return PreferVectorWidth; } unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } @@ -853,10 +876,10 @@ public: /// Return true if the subtarget allows calls to immediate address. bool isLegalToCallImmediateAddr() const; - /// If we are using retpolines, we need to expand indirectbr to avoid it + /// If we are using indirect thunks, we need to expand indirectbr to avoid it /// lowering to an actual indirect jump. bool enableIndirectBrExpand() const override { - return useRetpolineIndirectBranches(); + return useIndirectThunkBranches(); } /// Enable the MachineScheduler pass for all X86 subtargets. diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 7176e46f07b1..9f639ffa22ec 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -82,6 +82,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializeX86SpeculativeLoadHardeningPassPass(PR); initializeX86FlagsCopyLoweringPassPass(PR); initializeX86CondBrFoldingPassPass(PR); + initializeX86LoadValueInjectionLoadHardeningPassPass(PR); + initializeX86LoadValueInjectionRetHardeningPassPass(PR); initializeX86OptimizeLEAPassPass(PR); } @@ -496,6 +498,10 @@ void X86PassConfig::addMachineSSAOptimization() { void X86PassConfig::addPostRegAlloc() { addPass(createX86FloatingPointStackifierPass()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createX86LoadValueInjectionLoadHardeningPass()); + else + addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass()); } void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } @@ -525,7 +531,7 @@ void X86PassConfig::addPreEmitPass2() { const Triple &TT = TM->getTargetTriple(); const MCAsmInfo *MAI = TM->getMCAsmInfo(); - addPass(createX86RetpolineThunksPass()); + addPass(createX86IndirectThunksPass()); // Insert extra int3 instructions after trailing call instructions to avoid // issues in the unwinder. @@ -542,6 +548,7 @@ void X86PassConfig::addPreEmitPass2() { // Identify valid longjmp targets for Windows Control Flow Guard. if (TT.isOSWindows()) addPass(createCFGuardLongjmpPass()); + addPass(createX86LoadValueInjectionRetHardeningPass()); } std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index ec976a971e3c..23561c25c50a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1768,7 +1768,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Constant *C2; // C-(C2-X) --> X+(C-C2) - if (match(Op1, m_Sub(m_Constant(C2), m_Value(X)))) + if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))) && !isa<ConstantExpr>(C2)) return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2)); // C-(X+C2) --> (C-C2)-X |