diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/GlobalISel')
18 files changed, 7363 insertions, 0 deletions
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp new file mode 100644 index 000000000000..07de31bec660 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -0,0 +1,183 @@ +//===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements some simple delegations needed for call lowering. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +bool CallLowering::lowerCall( + MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg, + ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const { + auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout(); + + // First step is to marshall all the function's parameters into the correct + // physregs and memory locations. Gather the sequence of argument types that + // we'll pass to the assigner function. + SmallVector<ArgInfo, 8> OrigArgs; + unsigned i = 0; + unsigned NumFixedArgs = CS.getFunctionType()->getNumParams(); + for (auto &Arg : CS.args()) { + ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, + i < NumFixedArgs}; + setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); + // We don't currently support swifterror or swiftself args. + if (OrigArg.Flags.isSwiftError() || OrigArg.Flags.isSwiftSelf()) + return false; + OrigArgs.push_back(OrigArg); + ++i; + } + + MachineOperand Callee = MachineOperand::CreateImm(0); + if (const Function *F = CS.getCalledFunction()) + Callee = MachineOperand::CreateGA(F, 0); + else + Callee = MachineOperand::CreateReg(GetCalleeReg(), false); + + ArgInfo OrigRet{ResReg, CS.getType(), ISD::ArgFlagsTy{}}; + if (!OrigRet.Ty->isVoidTy()) + setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS); + + return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs); +} + +template <typename FuncInfoTy> +void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, + const DataLayout &DL, + const FuncInfoTy &FuncInfo) const { + const AttributeList &Attrs = FuncInfo.getAttributes(); + if (Attrs.hasAttribute(OpIdx, Attribute::ZExt)) + Arg.Flags.setZExt(); + if (Attrs.hasAttribute(OpIdx, Attribute::SExt)) + Arg.Flags.setSExt(); + if (Attrs.hasAttribute(OpIdx, Attribute::InReg)) + Arg.Flags.setInReg(); + if (Attrs.hasAttribute(OpIdx, Attribute::StructRet)) + Arg.Flags.setSRet(); + if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf)) + Arg.Flags.setSwiftSelf(); + if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError)) + Arg.Flags.setSwiftError(); + if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) + Arg.Flags.setByVal(); + if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) + Arg.Flags.setInAlloca(); + + if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) { + Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType(); + Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); + // For ByVal, alignment should be passed from FE. BE will guess if + // this info is not there but there are cases it cannot get right. + unsigned FrameAlign; + if (FuncInfo.getParamAlignment(OpIdx - 2)) + FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2); + else + FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); + Arg.Flags.setByValAlign(FrameAlign); + } + if (Attrs.hasAttribute(OpIdx, Attribute::Nest)) + Arg.Flags.setNest(); + Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty)); +} + +template void +CallLowering::setArgFlags<Function>(CallLowering::ArgInfo &Arg, unsigned OpIdx, + const DataLayout &DL, + const Function &FuncInfo) const; + +template void +CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx, + const DataLayout &DL, + const CallInst &FuncInfo) const; + +bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, + ArrayRef<ArgInfo> Args, + ValueHandler &Handler) const { + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = MF.getFunction(); + const DataLayout &DL = F.getParent()->getDataLayout(); + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); + + unsigned NumArgs = Args.size(); + for (unsigned i = 0; i != NumArgs; ++i) { + MVT CurVT = MVT::getVT(Args[i].Ty); + if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) + return false; + } + + for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) { + assert(j < ArgLocs.size() && "Skipped too many arg locs"); + + CCValAssign &VA = ArgLocs[j]; + assert(VA.getValNo() == i && "Location doesn't correspond to current arg"); + + if (VA.needsCustom()) { + j += Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j)); + continue; + } + + if (VA.isRegLoc()) + Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA); + else if (VA.isMemLoc()) { + unsigned Size = VA.getValVT() == MVT::iPTR + ? DL.getPointerSize() + : alignTo(VA.getValVT().getSizeInBits(), 8) / 8; + unsigned Offset = VA.getLocMemOffset(); + MachinePointerInfo MPO; + unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO); + Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA); + } else { + // FIXME: Support byvals and other weirdness + return false; + } + } + return true; +} + +unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg, + CCValAssign &VA) { + LLT LocTy{VA.getLocVT()}; + switch (VA.getLocInfo()) { + default: break; + case CCValAssign::Full: + case CCValAssign::BCvt: + // FIXME: bitconverting between vector types may or may not be a + // nop in big-endian situations. + return ValReg; + case CCValAssign::AExt: { + assert(!VA.getLocVT().isVector() && "unexpected vector extend"); + auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg); + return MIB->getOperand(0).getReg(); + } + case CCValAssign::SExt: { + unsigned NewReg = MRI.createGenericVirtualRegister(LocTy); + MIRBuilder.buildSExt(NewReg, ValReg); + return NewReg; + } + case CCValAssign::ZExt: { + unsigned NewReg = MRI.createGenericVirtualRegister(LocTy); + MIRBuilder.buildZExt(NewReg, ValReg); + return NewReg; + } + } + llvm_unreachable("unable to extend register"); +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp new file mode 100644 index 000000000000..0bc5b87de150 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -0,0 +1,81 @@ +//===-- lib/CodeGen/GlobalISel/GICombiner.cpp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file constains common code to combine machine functions at generic +// level. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/GlobalISel/GISelWorkList.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "gi-combiner" + +using namespace llvm; + +Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC) + : CInfo(Info), TPC(TPC) { + (void)this->TPC; // FIXME: Remove when used. +} + +bool Combiner::combineMachineInstrs(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running this pass. + // FIXME: Should this be here or in individual combiner passes. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + MRI = &MF.getRegInfo(); + Builder.setMF(MF); + + LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n'); + + MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); + + bool MFChanged = false; + bool Changed; + + do { + // Collect all instructions. Do a post order traversal for basic blocks and + // insert with list bottom up, so while we pop_back_val, we'll traverse top + // down RPOT. + Changed = false; + GISelWorkList<512> WorkList; + for (MachineBasicBlock *MBB : post_order(&MF)) { + if (MBB->empty()) + continue; + for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) { + MachineInstr *CurMI = &*MII; + ++MII; + // Erase dead insts before even adding to the list. + if (isTriviallyDead(*CurMI, *MRI)) { + LLVM_DEBUG(dbgs() << *CurMI << "Is dead; erasing.\n"); + CurMI->eraseFromParentAndMarkDBGValuesForRemoval(); + continue; + } + WorkList.insert(CurMI); + } + } + // Main Loop. Process the instructions here. + while (!WorkList.empty()) { + MachineInstr *CurrInst = WorkList.pop_back_val(); + LLVM_DEBUG(dbgs() << "Try combining " << *CurrInst << "\n";); + Changed |= CInfo.combine(*CurrInst, Builder); + } + MFChanged |= Changed; + } while (Changed); + + return MFChanged; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp new file mode 100644 index 000000000000..44e904a6391b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -0,0 +1,41 @@ +//== ---lib/CodeGen/GlobalISel/GICombinerHelper.cpp --------------------- == // +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#define DEBUG_TYPE "gi-combine" + +using namespace llvm; + +CombinerHelper::CombinerHelper(MachineIRBuilder &B) : + Builder(B), MRI(Builder.getMF().getRegInfo()) {} + +bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { + if (MI.getOpcode() != TargetOpcode::COPY) + return false; + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + // Simple Copy Propagation. + // a(sx) = COPY b(sx) -> Replace all uses of a with b. + if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) { + MI.eraseFromParent(); + MRI.replaceRegWith(DstReg, SrcReg); + return true; + } + return false; +} + +bool CombinerHelper::tryCombine(MachineInstr &MI) { + return tryCombineCopy(MI); +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp new file mode 100644 index 000000000000..00c6a9d63158 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -0,0 +1,25 @@ +//===-- llvm/CodeGen/GlobalISel/GlobalIsel.cpp --- GlobalISel ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +// This file implements the common initialization routines for the +// GlobalISel library. +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" + +using namespace llvm; + +void llvm::initializeGlobalISel(PassRegistry &Registry) { + initializeIRTranslatorPass(Registry); + initializeLegalizerPass(Registry); + initializeLocalizerPass(Registry); + initializeRegBankSelectPass(Registry); + initializeInstructionSelectPass(Registry); +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp new file mode 100644 index 000000000000..bafb7a05536d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -0,0 +1,1676 @@ +//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the IRTranslator class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <string> +#include <utility> +#include <vector> + +#define DEBUG_TYPE "irtranslator" + +using namespace llvm; + +char IRTranslator::ID = 0; + +INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", + false, false) + +static void reportTranslationError(MachineFunction &MF, + const TargetPassConfig &TPC, + OptimizationRemarkEmitter &ORE, + OptimizationRemarkMissed &R) { + MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); + + // Print the function name explicitly if we don't have a debug location (which + // makes the diagnostic less useful) or if we're going to emit a raw error. + if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled()) + R << (" (in function: " + MF.getName() + ")").str(); + + if (TPC.isGlobalISelAbortEnabled()) + report_fatal_error(R.getMsg()); + else + ORE.emit(R); +} + +IRTranslator::IRTranslator() : MachineFunctionPass(ID) { + initializeIRTranslatorPass(*PassRegistry::getPassRegistry()); +} + +void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<StackProtector>(); + AU.addRequired<TargetPassConfig>(); + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +static void computeValueLLTs(const DataLayout &DL, Type &Ty, + SmallVectorImpl<LLT> &ValueTys, + SmallVectorImpl<uint64_t> *Offsets = nullptr, + uint64_t StartingOffset = 0) { + // Given a struct type, recursively traverse the elements. + if (StructType *STy = dyn_cast<StructType>(&Ty)) { + const StructLayout *SL = DL.getStructLayout(STy); + for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) + computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets, + StartingOffset + SL->getElementOffset(I)); + return; + } + // Given an array type, recursively traverse the elements. + if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) { + Type *EltTy = ATy->getElementType(); + uint64_t EltSize = DL.getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + computeValueLLTs(DL, *EltTy, ValueTys, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty.isVoidTy()) + return; + // Base case: we can get an LLT for this LLVM IR type. + ValueTys.push_back(getLLTForType(Ty, DL)); + if (Offsets != nullptr) + Offsets->push_back(StartingOffset * 8); +} + +IRTranslator::ValueToVRegInfo::VRegListT & +IRTranslator::allocateVRegs(const Value &Val) { + assert(!VMap.contains(Val) && "Value already allocated in VMap"); + auto *Regs = VMap.getVRegs(Val); + auto *Offsets = VMap.getOffsets(Val); + SmallVector<LLT, 4> SplitTys; + computeValueLLTs(*DL, *Val.getType(), SplitTys, + Offsets->empty() ? Offsets : nullptr); + for (unsigned i = 0; i < SplitTys.size(); ++i) + Regs->push_back(0); + return *Regs; +} + +ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) { + auto VRegsIt = VMap.findVRegs(Val); + if (VRegsIt != VMap.vregs_end()) + return *VRegsIt->second; + + if (Val.getType()->isVoidTy()) + return *VMap.getVRegs(Val); + + // Create entry for this type. + auto *VRegs = VMap.getVRegs(Val); + auto *Offsets = VMap.getOffsets(Val); + + assert(Val.getType()->isSized() && + "Don't know how to create an empty vreg"); + + SmallVector<LLT, 4> SplitTys; + computeValueLLTs(*DL, *Val.getType(), SplitTys, + Offsets->empty() ? Offsets : nullptr); + + if (!isa<Constant>(Val)) { + for (auto Ty : SplitTys) + VRegs->push_back(MRI->createGenericVirtualRegister(Ty)); + return *VRegs; + } + + if (Val.getType()->isAggregateType()) { + // UndefValue, ConstantAggregateZero + auto &C = cast<Constant>(Val); + unsigned Idx = 0; + while (auto Elt = C.getAggregateElement(Idx++)) { + auto EltRegs = getOrCreateVRegs(*Elt); + std::copy(EltRegs.begin(), EltRegs.end(), std::back_inserter(*VRegs)); + } + } else { + assert(SplitTys.size() == 1 && "unexpectedly split LLT"); + VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0])); + bool Success = translate(cast<Constant>(Val), VRegs->front()); + if (!Success) { + OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", + MF->getFunction().getSubprogram(), + &MF->getFunction().getEntryBlock()); + R << "unable to translate constant: " << ore::NV("Type", Val.getType()); + reportTranslationError(*MF, *TPC, *ORE, R); + return *VRegs; + } + } + + return *VRegs; +} + +int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { + if (FrameIndices.find(&AI) != FrameIndices.end()) + return FrameIndices[&AI]; + + unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType()); + unsigned Size = + ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue(); + + // Always allocate at least one byte. + Size = std::max(Size, 1u); + + unsigned Alignment = AI.getAlignment(); + if (!Alignment) + Alignment = DL->getABITypeAlignment(AI.getAllocatedType()); + + int &FI = FrameIndices[&AI]; + FI = MF->getFrameInfo().CreateStackObject(Size, Alignment, false, &AI); + return FI; +} + +unsigned IRTranslator::getMemOpAlignment(const Instruction &I) { + unsigned Alignment = 0; + Type *ValTy = nullptr; + if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) { + Alignment = SI->getAlignment(); + ValTy = SI->getValueOperand()->getType(); + } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) { + Alignment = LI->getAlignment(); + ValTy = LI->getType(); + } else if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) { + // TODO(PR27168): This instruction has no alignment attribute, but unlike + // the default alignment for load/store, the default here is to assume + // it has NATURAL alignment, not DataLayout-specified alignment. + const DataLayout &DL = AI->getModule()->getDataLayout(); + Alignment = DL.getTypeStoreSize(AI->getCompareOperand()->getType()); + ValTy = AI->getCompareOperand()->getType(); + } else if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) { + // TODO(PR27168): This instruction has no alignment attribute, but unlike + // the default alignment for load/store, the default here is to assume + // it has NATURAL alignment, not DataLayout-specified alignment. + const DataLayout &DL = AI->getModule()->getDataLayout(); + Alignment = DL.getTypeStoreSize(AI->getValOperand()->getType()); + ValTy = AI->getType(); + } else { + OptimizationRemarkMissed R("gisel-irtranslator", "", &I); + R << "unable to translate memop: " << ore::NV("Opcode", &I); + reportTranslationError(*MF, *TPC, *ORE, R); + return 1; + } + + return Alignment ? Alignment : DL->getABITypeAlignment(ValTy); +} + +MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) { + MachineBasicBlock *&MBB = BBToMBB[&BB]; + assert(MBB && "BasicBlock was not encountered before"); + return *MBB; +} + +void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) { + assert(NewPred && "new predecessor must be a real MachineBasicBlock"); + MachinePreds[Edge].push_back(NewPred); +} + +bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, + MachineIRBuilder &MIRBuilder) { + // FIXME: handle signed/unsigned wrapping flags. + + // Get or create a virtual register for each value. + // Unless the value is a Constant => loadimm cst? + // or inline constant each time? + // Creation of a virtual register needs to have a size. + unsigned Op0 = getOrCreateVReg(*U.getOperand(0)); + unsigned Op1 = getOrCreateVReg(*U.getOperand(1)); + unsigned Res = getOrCreateVReg(U); + MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1); + return true; +} + +bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) { + // -0.0 - X --> G_FNEG + if (isa<Constant>(U.getOperand(0)) && + U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) { + MIRBuilder.buildInstr(TargetOpcode::G_FNEG) + .addDef(getOrCreateVReg(U)) + .addUse(getOrCreateVReg(*U.getOperand(1))); + return true; + } + return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder); +} + +bool IRTranslator::translateCompare(const User &U, + MachineIRBuilder &MIRBuilder) { + const CmpInst *CI = dyn_cast<CmpInst>(&U); + unsigned Op0 = getOrCreateVReg(*U.getOperand(0)); + unsigned Op1 = getOrCreateVReg(*U.getOperand(1)); + unsigned Res = getOrCreateVReg(U); + CmpInst::Predicate Pred = + CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>( + cast<ConstantExpr>(U).getPredicate()); + if (CmpInst::isIntPredicate(Pred)) + MIRBuilder.buildICmp(Pred, Res, Op0, Op1); + else if (Pred == CmpInst::FCMP_FALSE) + MIRBuilder.buildCopy( + Res, getOrCreateVReg(*Constant::getNullValue(CI->getType()))); + else if (Pred == CmpInst::FCMP_TRUE) + MIRBuilder.buildCopy( + Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType()))); + else + MIRBuilder.buildFCmp(Pred, Res, Op0, Op1); + + return true; +} + +bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { + const ReturnInst &RI = cast<ReturnInst>(U); + const Value *Ret = RI.getReturnValue(); + if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0) + Ret = nullptr; + // The target may mess up with the insertion point, but + // this is not important as a return is the last instruction + // of the block anyway. + + // FIXME: this interface should simplify when CallLowering gets adapted to + // multiple VRegs per Value. + unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0; + return CLI->lowerReturn(MIRBuilder, Ret, VReg); +} + +bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { + const BranchInst &BrInst = cast<BranchInst>(U); + unsigned Succ = 0; + if (!BrInst.isUnconditional()) { + // We want a G_BRCOND to the true BB followed by an unconditional branch. + unsigned Tst = getOrCreateVReg(*BrInst.getCondition()); + const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++)); + MachineBasicBlock &TrueBB = getMBB(TrueTgt); + MIRBuilder.buildBrCond(Tst, TrueBB); + } + + const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ)); + MachineBasicBlock &TgtBB = getMBB(BrTgt); + MachineBasicBlock &CurBB = MIRBuilder.getMBB(); + + // If the unconditional target is the layout successor, fallthrough. + if (!CurBB.isLayoutSuccessor(&TgtBB)) + MIRBuilder.buildBr(TgtBB); + + // Link successors. + for (const BasicBlock *Succ : BrInst.successors()) + CurBB.addSuccessor(&getMBB(*Succ)); + return true; +} + +bool IRTranslator::translateSwitch(const User &U, + MachineIRBuilder &MIRBuilder) { + // For now, just translate as a chain of conditional branches. + // FIXME: could we share most of the logic/code in + // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel? + // At first sight, it seems most of the logic in there is independent of + // SelectionDAG-specifics and a lot of work went in to optimize switch + // lowering in there. + + const SwitchInst &SwInst = cast<SwitchInst>(U); + const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition()); + const BasicBlock *OrigBB = SwInst.getParent(); + + LLT LLTi1 = getLLTForType(*Type::getInt1Ty(U.getContext()), *DL); + for (auto &CaseIt : SwInst.cases()) { + const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue()); + const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1); + MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue); + MachineBasicBlock &CurMBB = MIRBuilder.getMBB(); + const BasicBlock *TrueBB = CaseIt.getCaseSuccessor(); + MachineBasicBlock &TrueMBB = getMBB(*TrueBB); + + MIRBuilder.buildBrCond(Tst, TrueMBB); + CurMBB.addSuccessor(&TrueMBB); + addMachineCFGPred({OrigBB, TrueBB}, &CurMBB); + + MachineBasicBlock *FalseMBB = + MF->CreateMachineBasicBlock(SwInst.getParent()); + // Insert the comparison blocks one after the other. + MF->insert(std::next(CurMBB.getIterator()), FalseMBB); + MIRBuilder.buildBr(*FalseMBB); + CurMBB.addSuccessor(FalseMBB); + + MIRBuilder.setMBB(*FalseMBB); + } + // handle default case + const BasicBlock *DefaultBB = SwInst.getDefaultDest(); + MachineBasicBlock &DefaultMBB = getMBB(*DefaultBB); + MIRBuilder.buildBr(DefaultMBB); + MachineBasicBlock &CurMBB = MIRBuilder.getMBB(); + CurMBB.addSuccessor(&DefaultMBB); + addMachineCFGPred({OrigBB, DefaultBB}, &CurMBB); + + return true; +} + +bool IRTranslator::translateIndirectBr(const User &U, + MachineIRBuilder &MIRBuilder) { + const IndirectBrInst &BrInst = cast<IndirectBrInst>(U); + + const unsigned Tgt = getOrCreateVReg(*BrInst.getAddress()); + MIRBuilder.buildBrIndirect(Tgt); + + // Link successors. + MachineBasicBlock &CurBB = MIRBuilder.getMBB(); + for (const BasicBlock *Succ : BrInst.successors()) + CurBB.addSuccessor(&getMBB(*Succ)); + + return true; +} + +bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { + const LoadInst &LI = cast<LoadInst>(U); + + auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone; + Flags |= MachineMemOperand::MOLoad; + + if (DL->getTypeStoreSize(LI.getType()) == 0) + return true; + + ArrayRef<unsigned> Regs = getOrCreateVRegs(LI); + ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI); + unsigned Base = getOrCreateVReg(*LI.getPointerOperand()); + + for (unsigned i = 0; i < Regs.size(); ++i) { + unsigned Addr = 0; + MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + + MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); + unsigned BaseAlign = getMemOpAlignment(LI); + auto MMO = MF->getMachineMemOperand( + Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8, + MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, + LI.getSyncScopeID(), LI.getOrdering()); + MIRBuilder.buildLoad(Regs[i], Addr, *MMO); + } + + return true; +} + +bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { + const StoreInst &SI = cast<StoreInst>(U); + auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone; + Flags |= MachineMemOperand::MOStore; + + if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) + return true; + + ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand()); + ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand()); + unsigned Base = getOrCreateVReg(*SI.getPointerOperand()); + + for (unsigned i = 0; i < Vals.size(); ++i) { + unsigned Addr = 0; + MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + + MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); + unsigned BaseAlign = getMemOpAlignment(SI); + auto MMO = MF->getMachineMemOperand( + Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8, + MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, + SI.getSyncScopeID(), SI.getOrdering()); + MIRBuilder.buildStore(Vals[i], Addr, *MMO); + } + return true; +} + +static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) { + const Value *Src = U.getOperand(0); + Type *Int32Ty = Type::getInt32Ty(U.getContext()); + + // getIndexedOffsetInType is designed for GEPs, so the first index is the + // usual array element rather than looking into the actual aggregate. + SmallVector<Value *, 1> Indices; + Indices.push_back(ConstantInt::get(Int32Ty, 0)); + + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) { + for (auto Idx : EVI->indices()) + Indices.push_back(ConstantInt::get(Int32Ty, Idx)); + } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) { + for (auto Idx : IVI->indices()) + Indices.push_back(ConstantInt::get(Int32Ty, Idx)); + } else { + for (unsigned i = 1; i < U.getNumOperands(); ++i) + Indices.push_back(U.getOperand(i)); + } + + return 8 * static_cast<uint64_t>( + DL.getIndexedOffsetInType(Src->getType(), Indices)); +} + +bool IRTranslator::translateExtractValue(const User &U, + MachineIRBuilder &MIRBuilder) { + const Value *Src = U.getOperand(0); + uint64_t Offset = getOffsetFromIndices(U, *DL); + ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); + ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src); + unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) - + Offsets.begin(); + auto &DstRegs = allocateVRegs(U); + + for (unsigned i = 0; i < DstRegs.size(); ++i) + DstRegs[i] = SrcRegs[Idx++]; + + return true; +} + +bool IRTranslator::translateInsertValue(const User &U, + MachineIRBuilder &MIRBuilder) { + const Value *Src = U.getOperand(0); + uint64_t Offset = getOffsetFromIndices(U, *DL); + auto &DstRegs = allocateVRegs(U); + ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U); + ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); + ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1)); + auto InsertedIt = InsertedRegs.begin(); + + for (unsigned i = 0; i < DstRegs.size(); ++i) { + if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end()) + DstRegs[i] = *InsertedIt++; + else + DstRegs[i] = SrcRegs[i]; + } + + return true; +} + +bool IRTranslator::translateSelect(const User &U, + MachineIRBuilder &MIRBuilder) { + unsigned Tst = getOrCreateVReg(*U.getOperand(0)); + ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U); + ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1)); + ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2)); + + for (unsigned i = 0; i < ResRegs.size(); ++i) + MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]); + + return true; +} + +bool IRTranslator::translateBitCast(const User &U, + MachineIRBuilder &MIRBuilder) { + // If we're bitcasting to the source type, we can reuse the source vreg. + if (getLLTForType(*U.getOperand(0)->getType(), *DL) == + getLLTForType(*U.getType(), *DL)) { + unsigned SrcReg = getOrCreateVReg(*U.getOperand(0)); + auto &Regs = *VMap.getVRegs(U); + // If we already assigned a vreg for this bitcast, we can't change that. + // Emit a copy to satisfy the users we already emitted. + if (!Regs.empty()) + MIRBuilder.buildCopy(Regs[0], SrcReg); + else { + Regs.push_back(SrcReg); + VMap.getOffsets(U)->push_back(0); + } + return true; + } + return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); +} + +bool IRTranslator::translateCast(unsigned Opcode, const User &U, + MachineIRBuilder &MIRBuilder) { + unsigned Op = getOrCreateVReg(*U.getOperand(0)); + unsigned Res = getOrCreateVReg(U); + MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op); + return true; +} + +bool IRTranslator::translateGetElementPtr(const User &U, + MachineIRBuilder &MIRBuilder) { + // FIXME: support vector GEPs. + if (U.getType()->isVectorTy()) + return false; + + Value &Op0 = *U.getOperand(0); + unsigned BaseReg = getOrCreateVReg(Op0); + Type *PtrIRTy = Op0.getType(); + LLT PtrTy = getLLTForType(*PtrIRTy, *DL); + Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy); + LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); + + int64_t Offset = 0; + for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U); + GTI != E; ++GTI) { + const Value *Idx = GTI.getOperand(); + if (StructType *StTy = GTI.getStructTypeOrNull()) { + unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); + Offset += DL->getStructLayout(StTy)->getElementOffset(Field); + continue; + } else { + uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + + // If this is a scalar constant or a splat vector of constants, + // handle it quickly. + if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { + Offset += ElementSize * CI->getSExtValue(); + continue; + } + + if (Offset != 0) { + unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); + unsigned OffsetReg = + getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset)); + MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg); + + BaseReg = NewBaseReg; + Offset = 0; + } + + unsigned IdxReg = getOrCreateVReg(*Idx); + if (MRI->getType(IdxReg) != OffsetTy) { + unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); + MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg); + IdxReg = NewIdxReg; + } + + // N = N + Idx * ElementSize; + // Avoid doing it for ElementSize of 1. + unsigned GepOffsetReg; + if (ElementSize != 1) { + unsigned ElementSizeReg = + getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); + + GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); + MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg); + } else + GepOffsetReg = IdxReg; + + unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); + MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg); + BaseReg = NewBaseReg; + } + } + + if (Offset != 0) { + unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset)); + MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg); + return true; + } + + MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg); + return true; +} + +bool IRTranslator::translateMemfunc(const CallInst &CI, + MachineIRBuilder &MIRBuilder, + unsigned ID) { + LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL); + Type *DstTy = CI.getArgOperand(0)->getType(); + if (cast<PointerType>(DstTy)->getAddressSpace() != 0 || + SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0)) + return false; + + SmallVector<CallLowering::ArgInfo, 8> Args; + for (int i = 0; i < 3; ++i) { + const auto &Arg = CI.getArgOperand(i); + Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType()); + } + + const char *Callee; + switch (ID) { + case Intrinsic::memmove: + case Intrinsic::memcpy: { + Type *SrcTy = CI.getArgOperand(1)->getType(); + if(cast<PointerType>(SrcTy)->getAddressSpace() != 0) + return false; + Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove"; + break; + } + case Intrinsic::memset: + Callee = "memset"; + break; + default: + return false; + } + + return CLI->lowerCall(MIRBuilder, CI.getCallingConv(), + MachineOperand::CreateES(Callee), + CallLowering::ArgInfo(0, CI.getType()), Args); +} + +void IRTranslator::getStackGuard(unsigned DstReg, + MachineIRBuilder &MIRBuilder) { + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF)); + auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD); + MIB.addDef(DstReg); + + auto &TLI = *MF->getSubtarget().getTargetLowering(); + Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent()); + if (!Global) + return; + + MachinePointerInfo MPInfo(Global); + MachineInstr::mmo_iterator MemRefs = MF->allocateMemRefsArray(1); + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | + MachineMemOperand::MODereferenceable; + *MemRefs = + MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, + DL->getPointerABIAlignment(0)); + MIB.setMemRefs(MemRefs, MemRefs + 1); +} + +bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, + MachineIRBuilder &MIRBuilder) { + ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI); + auto MIB = MIRBuilder.buildInstr(Op) + .addDef(ResRegs[0]) + .addDef(ResRegs[1]) + .addUse(getOrCreateVReg(*CI.getOperand(0))) + .addUse(getOrCreateVReg(*CI.getOperand(1))); + + if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) { + unsigned Zero = getOrCreateVReg( + *Constant::getNullValue(Type::getInt1Ty(CI.getContext()))); + MIB.addUse(Zero); + } + + return true; +} + +bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, + MachineIRBuilder &MIRBuilder) { + switch (ID) { + default: + break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // Stack coloring is not enabled in O0 (which we care about now) so we can + // drop these. Make sure someone notices when we start compiling at higher + // opts though. + if (MF->getTarget().getOptLevel() != CodeGenOpt::None) + return false; + return true; + case Intrinsic::dbg_declare: { + const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI); + assert(DI.getVariable() && "Missing variable"); + + const Value *Address = DI.getAddress(); + if (!Address || isa<UndefValue>(Address)) { + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + return true; + } + + assert(DI.getVariable()->isValidLocationForIntrinsic( + MIRBuilder.getDebugLoc()) && + "Expected inlined-at fields to agree"); + auto AI = dyn_cast<AllocaInst>(Address); + if (AI && AI->isStaticAlloca()) { + // Static allocas are tracked at the MF level, no need for DBG_VALUE + // instructions (in fact, they get ignored if they *do* exist). + MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(), + getOrCreateFrameIndex(*AI), DI.getDebugLoc()); + } else + MIRBuilder.buildDirectDbgValue(getOrCreateVReg(*Address), + DI.getVariable(), DI.getExpression()); + return true; + } + case Intrinsic::vaend: + // No target I know of cares about va_end. Certainly no in-tree target + // does. Simplest intrinsic ever! + return true; + case Intrinsic::vastart: { + auto &TLI = *MF->getSubtarget().getTargetLowering(); + Value *Ptr = CI.getArgOperand(0); + unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8; + + MIRBuilder.buildInstr(TargetOpcode::G_VASTART) + .addUse(getOrCreateVReg(*Ptr)) + .addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 0)); + return true; + } + case Intrinsic::dbg_value: { + // This form of DBG_VALUE is target-independent. + const DbgValueInst &DI = cast<DbgValueInst>(CI); + const Value *V = DI.getValue(); + assert(DI.getVariable()->isValidLocationForIntrinsic( + MIRBuilder.getDebugLoc()) && + "Expected inlined-at fields to agree"); + if (!V) { + // Currently the optimizer can produce this; insert an undef to + // help debugging. Probably the optimizer should not do this. + MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression()); + } else if (const auto *CI = dyn_cast<Constant>(V)) { + MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression()); + } else { + unsigned Reg = getOrCreateVReg(*V); + // FIXME: This does not handle register-indirect values at offset 0. The + // direct/indirect thing shouldn't really be handled by something as + // implicit as reg+noreg vs reg+imm in the first palce, but it seems + // pretty baked in right now. + MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); + } + return true; + } + case Intrinsic::uadd_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder); + case Intrinsic::sadd_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder); + case Intrinsic::usub_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBE, MIRBuilder); + case Intrinsic::ssub_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder); + case Intrinsic::umul_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder); + case Intrinsic::smul_with_overflow: + return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder); + case Intrinsic::pow: + MIRBuilder.buildInstr(TargetOpcode::G_FPOW) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))) + .addUse(getOrCreateVReg(*CI.getArgOperand(1))); + return true; + case Intrinsic::exp: + MIRBuilder.buildInstr(TargetOpcode::G_FEXP) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::exp2: + MIRBuilder.buildInstr(TargetOpcode::G_FEXP2) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::log: + MIRBuilder.buildInstr(TargetOpcode::G_FLOG) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::log2: + MIRBuilder.buildInstr(TargetOpcode::G_FLOG2) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::fabs: + MIRBuilder.buildInstr(TargetOpcode::G_FABS) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + return true; + case Intrinsic::fma: + MIRBuilder.buildInstr(TargetOpcode::G_FMA) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))) + .addUse(getOrCreateVReg(*CI.getArgOperand(1))) + .addUse(getOrCreateVReg(*CI.getArgOperand(2))); + return true; + case Intrinsic::fmuladd: { + const TargetMachine &TM = MF->getTarget(); + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); + unsigned Dst = getOrCreateVReg(CI); + unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0)); + unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1)); + unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2)); + if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && + TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) { + // TODO: Revisit this to see if we should move this part of the + // lowering to the combiner. + MIRBuilder.buildInstr(TargetOpcode::G_FMA, Dst, Op0, Op1, Op2); + } else { + LLT Ty = getLLTForType(*CI.getType(), *DL); + auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, Ty, Op0, Op1); + MIRBuilder.buildInstr(TargetOpcode::G_FADD, Dst, FMul, Op2); + } + return true; + } + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + return translateMemfunc(CI, MIRBuilder, ID); + case Intrinsic::eh_typeid_for: { + GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0)); + unsigned Reg = getOrCreateVReg(CI); + unsigned TypeID = MF->getTypeIDFor(GV); + MIRBuilder.buildConstant(Reg, TypeID); + return true; + } + case Intrinsic::objectsize: { + // If we don't know by now, we're never going to know. + const ConstantInt *Min = cast<ConstantInt>(CI.getArgOperand(1)); + + MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0); + return true; + } + case Intrinsic::stackguard: + getStackGuard(getOrCreateVReg(CI), MIRBuilder); + return true; + case Intrinsic::stackprotector: { + LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); + unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy); + getStackGuard(GuardVal, MIRBuilder); + + AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1)); + MIRBuilder.buildStore( + GuardVal, getOrCreateVReg(*Slot), + *MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, + getOrCreateFrameIndex(*Slot)), + MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, + PtrTy.getSizeInBits() / 8, 8)); + return true; + } + } + return false; +} + +bool IRTranslator::translateInlineAsm(const CallInst &CI, + MachineIRBuilder &MIRBuilder) { + const InlineAsm &IA = cast<InlineAsm>(*CI.getCalledValue()); + if (!IA.getConstraintString().empty()) + return false; + + unsigned ExtraInfo = 0; + if (IA.hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA.getDialect() == InlineAsm::AD_Intel) + ExtraInfo |= InlineAsm::Extra_AsmDialect; + + MIRBuilder.buildInstr(TargetOpcode::INLINEASM) + .addExternalSymbol(IA.getAsmString().c_str()) + .addImm(ExtraInfo); + + return true; +} + +unsigned IRTranslator::packRegs(const Value &V, + MachineIRBuilder &MIRBuilder) { + ArrayRef<unsigned> Regs = getOrCreateVRegs(V); + ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); + LLT BigTy = getLLTForType(*V.getType(), *DL); + + if (Regs.size() == 1) + return Regs[0]; + + unsigned Dst = MRI->createGenericVirtualRegister(BigTy); + MIRBuilder.buildUndef(Dst); + for (unsigned i = 0; i < Regs.size(); ++i) { + unsigned NewDst = MRI->createGenericVirtualRegister(BigTy); + MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]); + Dst = NewDst; + } + return Dst; +} + +void IRTranslator::unpackRegs(const Value &V, unsigned Src, + MachineIRBuilder &MIRBuilder) { + ArrayRef<unsigned> Regs = getOrCreateVRegs(V); + ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); + + for (unsigned i = 0; i < Regs.size(); ++i) + MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]); +} + +bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { + const CallInst &CI = cast<CallInst>(U); + auto TII = MF->getTarget().getIntrinsicInfo(); + const Function *F = CI.getCalledFunction(); + + // FIXME: support Windows dllimport function calls. + if (F && F->hasDLLImportStorageClass()) + return false; + + if (CI.isInlineAsm()) + return translateInlineAsm(CI, MIRBuilder); + + Intrinsic::ID ID = Intrinsic::not_intrinsic; + if (F && F->isIntrinsic()) { + ID = F->getIntrinsicID(); + if (TII && ID == Intrinsic::not_intrinsic) + ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F)); + } + + bool IsSplitType = valueIsSplit(CI); + if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { + unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister( + getLLTForType(*CI.getType(), *DL)) + : getOrCreateVReg(CI); + + SmallVector<unsigned, 8> Args; + for (auto &Arg: CI.arg_operands()) + Args.push_back(packRegs(*Arg, MIRBuilder)); + + MF->getFrameInfo().setHasCalls(true); + bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() { + return getOrCreateVReg(*CI.getCalledValue()); + }); + + if (IsSplitType) + unpackRegs(CI, Res, MIRBuilder); + return Success; + } + + assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); + + if (translateKnownIntrinsic(CI, ID, MIRBuilder)) + return true; + + unsigned Res = 0; + if (!CI.getType()->isVoidTy()) { + if (IsSplitType) + Res = + MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL)); + else + Res = getOrCreateVReg(CI); + } + MachineInstrBuilder MIB = + MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory()); + + for (auto &Arg : CI.arg_operands()) { + // Some intrinsics take metadata parameters. Reject them. + if (isa<MetadataAsValue>(Arg)) + return false; + MIB.addUse(packRegs(*Arg, MIRBuilder)); + } + + if (IsSplitType) + unpackRegs(CI, Res, MIRBuilder); + + // Add a MachineMemOperand if it is a target mem intrinsic. + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); + TargetLowering::IntrinsicInfo Info; + // TODO: Add a GlobalISel version of getTgtMemIntrinsic. + if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { + uint64_t Size = Info.memVT.getStoreSize(); + MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), + Info.flags, Size, Info.align)); + } + + return true; +} + +bool IRTranslator::translateInvoke(const User &U, + MachineIRBuilder &MIRBuilder) { + const InvokeInst &I = cast<InvokeInst>(U); + MCContext &Context = MF->getContext(); + + const BasicBlock *ReturnBB = I.getSuccessor(0); + const BasicBlock *EHPadBB = I.getSuccessor(1); + + const Value *Callee = I.getCalledValue(); + const Function *Fn = dyn_cast<Function>(Callee); + if (isa<InlineAsm>(Callee)) + return false; + + // FIXME: support invoking patchpoint and statepoint intrinsics. + if (Fn && Fn->isIntrinsic()) + return false; + + // FIXME: support whatever these are. + if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) + return false; + + // FIXME: support Windows exception handling. + if (!isa<LandingPadInst>(EHPadBB->front())) + return false; + + // Emit the actual call, bracketed by EH_LABELs so that the MF knows about + // the region covered by the try. + MCSymbol *BeginSymbol = Context.createTempSymbol(); + MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); + + unsigned Res = + MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL)); + SmallVector<unsigned, 8> Args; + for (auto &Arg: I.arg_operands()) + Args.push_back(packRegs(*Arg, MIRBuilder)); + + if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, + [&]() { return getOrCreateVReg(*I.getCalledValue()); })) + return false; + + unpackRegs(I, Res, MIRBuilder); + + MCSymbol *EndSymbol = Context.createTempSymbol(); + MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); + + // FIXME: track probabilities. + MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB), + &ReturnMBB = getMBB(*ReturnBB); + MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol); + MIRBuilder.getMBB().addSuccessor(&ReturnMBB); + MIRBuilder.getMBB().addSuccessor(&EHPadMBB); + MIRBuilder.buildBr(ReturnMBB); + + return true; +} + +bool IRTranslator::translateLandingPad(const User &U, + MachineIRBuilder &MIRBuilder) { + const LandingPadInst &LP = cast<LandingPadInst>(U); + + MachineBasicBlock &MBB = MIRBuilder.getMBB(); + addLandingPadInfo(LP, MBB); + + MBB.setIsEHPad(); + + // If there aren't registers to copy the values into (e.g., during SjLj + // exceptions), then don't bother. + auto &TLI = *MF->getSubtarget().getTargetLowering(); + const Constant *PersonalityFn = MF->getFunction().getPersonalityFn(); + if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && + TLI.getExceptionSelectorRegister(PersonalityFn) == 0) + return true; + + // If landingpad's return type is token type, we don't create DAG nodes + // for its exception pointer and selector value. The extraction of exception + // pointer or selector value from token type landingpads is not currently + // supported. + if (LP.getType()->isTokenTy()) + return true; + + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + MIRBuilder.buildInstr(TargetOpcode::EH_LABEL) + .addSym(MF->addLandingPad(&MBB)); + + LLT Ty = getLLTForType(*LP.getType(), *DL); + unsigned Undef = MRI->createGenericVirtualRegister(Ty); + MIRBuilder.buildUndef(Undef); + + SmallVector<LLT, 2> Tys; + for (Type *Ty : cast<StructType>(LP.getType())->elements()) + Tys.push_back(getLLTForType(*Ty, *DL)); + assert(Tys.size() == 2 && "Only two-valued landingpads are supported"); + + // Mark exception register as live in. + unsigned ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn); + if (!ExceptionReg) + return false; + + MBB.addLiveIn(ExceptionReg); + ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP); + MIRBuilder.buildCopy(ResRegs[0], ExceptionReg); + + unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn); + if (!SelectorReg) + return false; + + MBB.addLiveIn(SelectorReg); + unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]); + MIRBuilder.buildCopy(PtrVReg, SelectorReg); + MIRBuilder.buildCast(ResRegs[1], PtrVReg); + + return true; +} + +bool IRTranslator::translateAlloca(const User &U, + MachineIRBuilder &MIRBuilder) { + auto &AI = cast<AllocaInst>(U); + + if (AI.isSwiftError()) + return false; + + if (AI.isStaticAlloca()) { + unsigned Res = getOrCreateVReg(AI); + int FI = getOrCreateFrameIndex(AI); + MIRBuilder.buildFrameIndex(Res, FI); + return true; + } + + // FIXME: support stack probing for Windows. + if (MF->getTarget().getTargetTriple().isOSWindows()) + return false; + + // Now we're in the harder dynamic case. + Type *Ty = AI.getAllocatedType(); + unsigned Align = + std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment()); + + unsigned NumElts = getOrCreateVReg(*AI.getArraySize()); + + Type *IntPtrIRTy = DL->getIntPtrType(AI.getType()); + LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL); + if (MRI->getType(NumElts) != IntPtrTy) { + unsigned ExtElts = MRI->createGenericVirtualRegister(IntPtrTy); + MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts); + NumElts = ExtElts; + } + + unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy); + unsigned TySize = + getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty))); + MIRBuilder.buildMul(AllocSize, NumElts, TySize); + + LLT PtrTy = getLLTForType(*AI.getType(), *DL); + auto &TLI = *MF->getSubtarget().getTargetLowering(); + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + + unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy); + MIRBuilder.buildCopy(SPTmp, SPReg); + + unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy); + MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize); + + // Handle alignment. We have to realign if the allocation granule was smaller + // than stack alignment, or the specific alloca requires more than stack + // alignment. + unsigned StackAlign = + MF->getSubtarget().getFrameLowering()->getStackAlignment(); + Align = std::max(Align, StackAlign); + if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) { + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy); + MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align)); + AllocTmp = AlignedAlloc; + } + + MIRBuilder.buildCopy(SPReg, AllocTmp); + MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp); + + MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI); + assert(MF->getFrameInfo().hasVarSizedObjects()); + return true; +} + +bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) { + // FIXME: We may need more info about the type. Because of how LLT works, + // we're completely discarding the i64/double distinction here (amongst + // others). Fortunately the ABIs I know of where that matters don't use va_arg + // anyway but that's not guaranteed. + MIRBuilder.buildInstr(TargetOpcode::G_VAARG) + .addDef(getOrCreateVReg(U)) + .addUse(getOrCreateVReg(*U.getOperand(0))) + .addImm(DL->getABITypeAlignment(U.getType())); + return true; +} + +bool IRTranslator::translateInsertElement(const User &U, + MachineIRBuilder &MIRBuilder) { + // If it is a <1 x Ty> vector, use the scalar as it is + // not a legal vector type in LLT. + if (U.getType()->getVectorNumElements() == 1) { + unsigned Elt = getOrCreateVReg(*U.getOperand(1)); + auto &Regs = *VMap.getVRegs(U); + if (Regs.empty()) { + Regs.push_back(Elt); + VMap.getOffsets(U)->push_back(0); + } else { + MIRBuilder.buildCopy(Regs[0], Elt); + } + return true; + } + + unsigned Res = getOrCreateVReg(U); + unsigned Val = getOrCreateVReg(*U.getOperand(0)); + unsigned Elt = getOrCreateVReg(*U.getOperand(1)); + unsigned Idx = getOrCreateVReg(*U.getOperand(2)); + MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx); + return true; +} + +bool IRTranslator::translateExtractElement(const User &U, + MachineIRBuilder &MIRBuilder) { + // If it is a <1 x Ty> vector, use the scalar as it is + // not a legal vector type in LLT. + if (U.getOperand(0)->getType()->getVectorNumElements() == 1) { + unsigned Elt = getOrCreateVReg(*U.getOperand(0)); + auto &Regs = *VMap.getVRegs(U); + if (Regs.empty()) { + Regs.push_back(Elt); + VMap.getOffsets(U)->push_back(0); + } else { + MIRBuilder.buildCopy(Regs[0], Elt); + } + return true; + } + unsigned Res = getOrCreateVReg(U); + unsigned Val = getOrCreateVReg(*U.getOperand(0)); + unsigned Idx = getOrCreateVReg(*U.getOperand(1)); + MIRBuilder.buildExtractVectorElement(Res, Val, Idx); + return true; +} + +bool IRTranslator::translateShuffleVector(const User &U, + MachineIRBuilder &MIRBuilder) { + MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR) + .addDef(getOrCreateVReg(U)) + .addUse(getOrCreateVReg(*U.getOperand(0))) + .addUse(getOrCreateVReg(*U.getOperand(1))) + .addUse(getOrCreateVReg(*U.getOperand(2))); + return true; +} + +bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) { + const PHINode &PI = cast<PHINode>(U); + + SmallVector<MachineInstr *, 4> Insts; + for (auto Reg : getOrCreateVRegs(PI)) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg); + Insts.push_back(MIB.getInstr()); + } + + PendingPHIs.emplace_back(&PI, std::move(Insts)); + return true; +} + +bool IRTranslator::translateAtomicCmpXchg(const User &U, + MachineIRBuilder &MIRBuilder) { + const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U); + + if (I.isWeak()) + return false; + + auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone; + Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + + Type *ResType = I.getType(); + Type *ValType = ResType->Type::getStructElementType(0); + + auto Res = getOrCreateVRegs(I); + unsigned OldValRes = Res[0]; + unsigned SuccessRes = Res[1]; + unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); + unsigned Cmp = getOrCreateVReg(*I.getCompareOperand()); + unsigned NewVal = getOrCreateVReg(*I.getNewValOperand()); + + MIRBuilder.buildAtomicCmpXchgWithSuccess( + OldValRes, SuccessRes, Addr, Cmp, NewVal, + *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + Flags, DL->getTypeStoreSize(ValType), + getMemOpAlignment(I), AAMDNodes(), nullptr, + I.getSyncScopeID(), I.getSuccessOrdering(), + I.getFailureOrdering())); + return true; +} + +bool IRTranslator::translateAtomicRMW(const User &U, + MachineIRBuilder &MIRBuilder) { + const AtomicRMWInst &I = cast<AtomicRMWInst>(U); + + auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone; + Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + + Type *ResType = I.getType(); + + unsigned Res = getOrCreateVReg(I); + unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); + unsigned Val = getOrCreateVReg(*I.getValOperand()); + + unsigned Opcode = 0; + switch (I.getOperation()) { + default: + llvm_unreachable("Unknown atomicrmw op"); + return false; + case AtomicRMWInst::Xchg: + Opcode = TargetOpcode::G_ATOMICRMW_XCHG; + break; + case AtomicRMWInst::Add: + Opcode = TargetOpcode::G_ATOMICRMW_ADD; + break; + case AtomicRMWInst::Sub: + Opcode = TargetOpcode::G_ATOMICRMW_SUB; + break; + case AtomicRMWInst::And: + Opcode = TargetOpcode::G_ATOMICRMW_AND; + break; + case AtomicRMWInst::Nand: + Opcode = TargetOpcode::G_ATOMICRMW_NAND; + break; + case AtomicRMWInst::Or: + Opcode = TargetOpcode::G_ATOMICRMW_OR; + break; + case AtomicRMWInst::Xor: + Opcode = TargetOpcode::G_ATOMICRMW_XOR; + break; + case AtomicRMWInst::Max: + Opcode = TargetOpcode::G_ATOMICRMW_MAX; + break; + case AtomicRMWInst::Min: + Opcode = TargetOpcode::G_ATOMICRMW_MIN; + break; + case AtomicRMWInst::UMax: + Opcode = TargetOpcode::G_ATOMICRMW_UMAX; + break; + case AtomicRMWInst::UMin: + Opcode = TargetOpcode::G_ATOMICRMW_UMIN; + break; + } + + MIRBuilder.buildAtomicRMW( + Opcode, Res, Addr, Val, + *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + Flags, DL->getTypeStoreSize(ResType), + getMemOpAlignment(I), AAMDNodes(), nullptr, + I.getSyncScopeID(), I.getOrdering())); + return true; +} + +void IRTranslator::finishPendingPhis() { + for (auto &Phi : PendingPHIs) { + const PHINode *PI = Phi.first; + ArrayRef<MachineInstr *> ComponentPHIs = Phi.second; + + // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator + // won't create extra control flow here, otherwise we need to find the + // dominating predecessor here (or perhaps force the weirder IRTranslators + // to provide a simple boundary). + SmallSet<const BasicBlock *, 4> HandledPreds; + + for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) { + auto IRPred = PI->getIncomingBlock(i); + if (HandledPreds.count(IRPred)) + continue; + + HandledPreds.insert(IRPred); + ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i)); + for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) { + assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) && + "incorrect CFG at MachineBasicBlock level"); + for (unsigned j = 0; j < ValRegs.size(); ++j) { + MachineInstrBuilder MIB(*MF, ComponentPHIs[j]); + MIB.addUse(ValRegs[j]); + MIB.addMBB(Pred); + } + } + } + } +} + +bool IRTranslator::valueIsSplit(const Value &V, + SmallVectorImpl<uint64_t> *Offsets) { + SmallVector<LLT, 4> SplitTys; + computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets); + return SplitTys.size() > 1; +} + +bool IRTranslator::translate(const Instruction &Inst) { + CurBuilder.setDebugLoc(Inst.getDebugLoc()); + switch(Inst.getOpcode()) { +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder); +#include "llvm/IR/Instruction.def" + default: + return false; + } +} + +bool IRTranslator::translate(const Constant &C, unsigned Reg) { + if (auto CI = dyn_cast<ConstantInt>(&C)) + EntryBuilder.buildConstant(Reg, *CI); + else if (auto CF = dyn_cast<ConstantFP>(&C)) + EntryBuilder.buildFConstant(Reg, *CF); + else if (isa<UndefValue>(C)) + EntryBuilder.buildUndef(Reg); + else if (isa<ConstantPointerNull>(C)) { + // As we are trying to build a constant val of 0 into a pointer, + // insert a cast to make them correct with respect to types. + unsigned NullSize = DL->getTypeSizeInBits(C.getType()); + auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize); + auto *ZeroVal = ConstantInt::get(ZeroTy, 0); + unsigned ZeroReg = getOrCreateVReg(*ZeroVal); + EntryBuilder.buildCast(Reg, ZeroReg); + } else if (auto GV = dyn_cast<GlobalValue>(&C)) + EntryBuilder.buildGlobalValue(Reg, GV); + else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) { + if (!CAZ->getType()->isVectorTy()) + return false; + // Return the scalar if it is a <1 x Ty> vector. + if (CAZ->getNumElements() == 1) + return translate(*CAZ->getElementValue(0u), Reg); + std::vector<unsigned> Ops; + for (unsigned i = 0; i < CAZ->getNumElements(); ++i) { + Constant &Elt = *CAZ->getElementValue(i); + Ops.push_back(getOrCreateVReg(Elt)); + } + EntryBuilder.buildMerge(Reg, Ops); + } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) { + // Return the scalar if it is a <1 x Ty> vector. + if (CV->getNumElements() == 1) + return translate(*CV->getElementAsConstant(0), Reg); + std::vector<unsigned> Ops; + for (unsigned i = 0; i < CV->getNumElements(); ++i) { + Constant &Elt = *CV->getElementAsConstant(i); + Ops.push_back(getOrCreateVReg(Elt)); + } + EntryBuilder.buildMerge(Reg, Ops); + } else if (auto CE = dyn_cast<ConstantExpr>(&C)) { + switch(CE->getOpcode()) { +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder); +#include "llvm/IR/Instruction.def" + default: + return false; + } + } else if (auto CV = dyn_cast<ConstantVector>(&C)) { + if (CV->getNumOperands() == 1) + return translate(*CV->getOperand(0), Reg); + SmallVector<unsigned, 4> Ops; + for (unsigned i = 0; i < CV->getNumOperands(); ++i) { + Ops.push_back(getOrCreateVReg(*CV->getOperand(i))); + } + EntryBuilder.buildMerge(Reg, Ops); + } else + return false; + + return true; +} + +void IRTranslator::finalizeFunction() { + // Release the memory used by the different maps we + // needed during the translation. + PendingPHIs.clear(); + VMap.reset(); + FrameIndices.clear(); + MachinePreds.clear(); + // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it + // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid + // destroying it twice (in ~IRTranslator() and ~LLVMContext()) + EntryBuilder = MachineIRBuilder(); + CurBuilder = MachineIRBuilder(); +} + +bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { + MF = &CurMF; + const Function &F = MF->getFunction(); + if (F.empty()) + return false; + CLI = MF->getSubtarget().getCallLowering(); + CurBuilder.setMF(*MF); + EntryBuilder.setMF(*MF); + MRI = &MF->getRegInfo(); + DL = &F.getParent()->getDataLayout(); + TPC = &getAnalysis<TargetPassConfig>(); + ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F); + + assert(PendingPHIs.empty() && "stale PHIs"); + + if (!DL->isLittleEndian()) { + // Currently we don't properly handle big endian code. + OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", + F.getSubprogram(), &F.getEntryBlock()); + R << "unable to translate in big endian mode"; + reportTranslationError(*MF, *TPC, *ORE, R); + } + + // Release the per-function state when we return, whether we succeeded or not. + auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); }); + + // Setup a separate basic-block for the arguments and constants + MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock(); + MF->push_back(EntryBB); + EntryBuilder.setMBB(*EntryBB); + + // Create all blocks, in IR order, to preserve the layout. + for (const BasicBlock &BB: F) { + auto *&MBB = BBToMBB[&BB]; + + MBB = MF->CreateMachineBasicBlock(&BB); + MF->push_back(MBB); + + if (BB.hasAddressTaken()) + MBB->setHasAddressTaken(); + } + + // Make our arguments/constants entry block fallthrough to the IR entry block. + EntryBB->addSuccessor(&getMBB(F.front())); + + // Lower the actual args into this basic block. + SmallVector<unsigned, 8> VRegArgs; + for (const Argument &Arg: F.args()) { + if (DL->getTypeStoreSize(Arg.getType()) == 0) + continue; // Don't handle zero sized types. + VRegArgs.push_back( + MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL))); + } + + // We don't currently support translating swifterror or swiftself functions. + for (auto &Arg : F.args()) { + if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) { + OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", + F.getSubprogram(), &F.getEntryBlock()); + R << "unable to lower arguments due to swifterror/swiftself: " + << ore::NV("Prototype", F.getType()); + reportTranslationError(*MF, *TPC, *ORE, R); + return false; + } + } + + if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) { + OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", + F.getSubprogram(), &F.getEntryBlock()); + R << "unable to lower arguments: " << ore::NV("Prototype", F.getType()); + reportTranslationError(*MF, *TPC, *ORE, R); + return false; + } + + auto ArgIt = F.arg_begin(); + for (auto &VArg : VRegArgs) { + // If the argument is an unsplit scalar then don't use unpackRegs to avoid + // creating redundant copies. + if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) { + auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt)); + assert(VRegs.empty() && "VRegs already populated?"); + VRegs.push_back(VArg); + } else { + unpackRegs(*ArgIt, VArg, EntryBuilder); + } + ArgIt++; + } + + // And translate the function! + for (const BasicBlock &BB : F) { + MachineBasicBlock &MBB = getMBB(BB); + // Set the insertion point of all the following translations to + // the end of this basic block. + CurBuilder.setMBB(MBB); + + for (const Instruction &Inst : BB) { + if (translate(Inst)) + continue; + + OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", + Inst.getDebugLoc(), &BB); + R << "unable to translate instruction: " << ore::NV("Opcode", &Inst); + + if (ORE->allowExtraAnalysis("gisel-irtranslator")) { + std::string InstStrStorage; + raw_string_ostream InstStr(InstStrStorage); + InstStr << Inst; + + R << ": '" << InstStr.str() << "'"; + } + + reportTranslationError(*MF, *TPC, *ORE, R); + return false; + } + } + + finishPendingPhis(); + + // Merge the argument lowering and constants block with its single + // successor, the LLVM-IR entry block. We want the basic block to + // be maximal. + assert(EntryBB->succ_size() == 1 && + "Custom BB used for lowering should have only one successor"); + // Get the successor of the current entry block. + MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin(); + assert(NewEntryBB.pred_size() == 1 && + "LLVM-IR entry block has a predecessor!?"); + // Move all the instruction from the current entry block to the + // new entry block. + NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(), + EntryBB->end()); + + // Update the live-in information for the new entry block. + for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins()) + NewEntryBB.addLiveIn(LiveIn); + NewEntryBB.sortUniqueLiveIns(); + + // Get rid of the now empty basic block. + EntryBB->removeSuccessor(&NewEntryBB); + MF->remove(EntryBB); + MF->DeleteMachineBasicBlock(EntryBB); + + assert(&MF->front() == &NewEntryBB && + "New entry wasn't next in the list of basic block!"); + + // Initialize stack protector information. + StackProtector &SP = getAnalysis<StackProtector>(); + SP.copyToMachineFrameInfo(MF->getFrameInfo()); + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp new file mode 100644 index 000000000000..c83c791327e4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -0,0 +1,243 @@ +//===- llvm/CodeGen/GlobalISel/InstructionSelect.cpp - InstructionSelect ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the InstructionSelect class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/config.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" + +#define DEBUG_TYPE "instruction-select" + +using namespace llvm; + +#ifdef LLVM_GISEL_COV_PREFIX +static cl::opt<std::string> + CoveragePrefix("gisel-coverage-prefix", cl::init(LLVM_GISEL_COV_PREFIX), + cl::desc("Record GlobalISel rule coverage files of this " + "prefix if instrumentation was generated")); +#else +static const std::string CoveragePrefix = ""; +#endif + +char InstructionSelect::ID = 0; +INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE, + "Select target instructions out of generic instructions", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE, + "Select target instructions out of generic instructions", + false, false) + +InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { + initializeInstructionSelectPass(*PassRegistry::getPassRegistry()); +} + +void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); + + const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); + const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + CodeGenCoverage CoverageInfo; + assert(ISel && "Cannot work without InstructionSelector"); + + // An optimization remark emitter. Used to report failures. + MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); + + // FIXME: There are many other MF/MFI fields we need to initialize. + + MachineRegisterInfo &MRI = MF.getRegInfo(); +#ifndef NDEBUG + // Check that our input is fully legal: we require the function to have the + // Legalized property, so it should be. + // FIXME: This should be in the MachineVerifier, as the RegBankSelected + // property check already is. + if (!DisableGISelLegalityCheck) + if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) { + reportGISelFailure(MF, TPC, MORE, "gisel-select", + "instruction is not legal", *MI); + return false; + } +#endif + // FIXME: We could introduce new blocks and will need to fix the outer loop. + // Until then, keep track of the number of blocks to assert that we don't. + const size_t NumBlocks = MF.size(); + + for (MachineBasicBlock *MBB : post_order(&MF)) { + if (MBB->empty()) + continue; + + // Select instructions in reverse block order. We permit erasing so have + // to resort to manually iterating and recognizing the begin (rend) case. + bool ReachedBegin = false; + for (auto MII = std::prev(MBB->end()), Begin = MBB->begin(); + !ReachedBegin;) { +#ifndef NDEBUG + // Keep track of the insertion range for debug printing. + const auto AfterIt = std::next(MII); +#endif + // Select this instruction. + MachineInstr &MI = *MII; + + // And have our iterator point to the next instruction, if there is one. + if (MII == Begin) + ReachedBegin = true; + else + --MII; + + LLVM_DEBUG(dbgs() << "Selecting: \n " << MI); + + // We could have folded this instruction away already, making it dead. + // If so, erase it. + if (isTriviallyDead(MI, MRI)) { + LLVM_DEBUG(dbgs() << "Is dead; erasing.\n"); + MI.eraseFromParentAndMarkDBGValuesForRemoval(); + continue; + } + + if (!ISel->select(MI, CoverageInfo)) { + // FIXME: It would be nice to dump all inserted instructions. It's + // not obvious how, esp. considering select() can insert after MI. + reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI); + return false; + } + + // Dump the range of instructions that MI expanded into. + LLVM_DEBUG({ + auto InsertedBegin = ReachedBegin ? MBB->begin() : std::next(MII); + dbgs() << "Into:\n"; + for (auto &InsertedMI : make_range(InsertedBegin, AfterIt)) + dbgs() << " " << InsertedMI; + dbgs() << '\n'; + }); + } + } + + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + for (MachineBasicBlock &MBB : MF) { + if (MBB.empty()) + continue; + + // Try to find redundant copies b/w vregs of the same register class. + bool ReachedBegin = false; + for (auto MII = std::prev(MBB.end()), Begin = MBB.begin(); !ReachedBegin;) { + // Select this instruction. + MachineInstr &MI = *MII; + + // And have our iterator point to the next instruction, if there is one. + if (MII == Begin) + ReachedBegin = true; + else + --MII; + if (MI.getOpcode() != TargetOpcode::COPY) + continue; + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(SrcReg) && + TargetRegisterInfo::isVirtualRegister(DstReg)) { + auto SrcRC = MRI.getRegClass(SrcReg); + auto DstRC = MRI.getRegClass(DstReg); + if (SrcRC == DstRC) { + MRI.replaceRegWith(DstReg, SrcReg); + MI.eraseFromParentAndMarkDBGValuesForRemoval(); + } + } + } + } + + // Now that selection is complete, there are no more generic vregs. Verify + // that the size of the now-constrained vreg is unchanged and that it has a + // register class. + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(I); + + MachineInstr *MI = nullptr; + if (!MRI.def_empty(VReg)) + MI = &*MRI.def_instr_begin(VReg); + else if (!MRI.use_empty(VReg)) + MI = &*MRI.use_instr_begin(VReg); + if (!MI) + continue; + + const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg); + if (!RC) { + reportGISelFailure(MF, TPC, MORE, "gisel-select", + "VReg has no regclass after selection", *MI); + return false; + } + + const LLT Ty = MRI.getType(VReg); + if (Ty.isValid() && Ty.getSizeInBits() > TRI.getRegSizeInBits(*RC)) { + reportGISelFailure( + MF, TPC, MORE, "gisel-select", + "VReg's low-level type and register class have different sizes", *MI); + return false; + } + } + + if (MF.size() != NumBlocks) { + MachineOptimizationRemarkMissed R("gisel-select", "GISelFailure", + MF.getFunction().getSubprogram(), + /*MBB=*/nullptr); + R << "inserting blocks is not supported yet"; + reportGISelFailure(MF, TPC, MORE, R); + return false; + } + + auto &TLI = *MF.getSubtarget().getTargetLowering(); + TLI.finalizeLowering(MF); + + LLVM_DEBUG({ + dbgs() << "Rules covered by selecting function: " << MF.getName() << ":"; + for (auto RuleID : CoverageInfo.covered()) + dbgs() << " id" << RuleID; + dbgs() << "\n\n"; + }); + CoverageInfo.emit(CoveragePrefix, + MF.getSubtarget() + .getTargetLowering() + ->getTargetMachine() + .getTarget() + .getBackendName()); + + // If we successfully selected the function nothing is going to use the vreg + // types after us (otherwise MIRPrinter would need them). Make sure the types + // disappear. + MRI.clearVirtRegTypes(); + + // FIXME: Should we accurately track changes? + return true; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp new file mode 100644 index 000000000000..5e77fcbb0ed9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -0,0 +1,84 @@ +//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements the InstructionSelector class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> + +#define DEBUG_TYPE "instructionselector" + +using namespace llvm; + +InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) + : Renderers(MaxRenderers), MIs() {} + +InstructionSelector::InstructionSelector() = default; + +bool InstructionSelector::constrainOperandRegToRegClass( + MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC, + const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const { + MachineBasicBlock &MBB = *I.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + return + constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC); +} + +bool InstructionSelector::isOperandImmEqual( + const MachineOperand &MO, int64_t Value, + const MachineRegisterInfo &MRI) const { + if (MO.isReg() && MO.getReg()) + if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI)) + return *VRegVal == Value; + return false; +} + +bool InstructionSelector::isBaseWithConstantOffset( + const MachineOperand &Root, const MachineRegisterInfo &MRI) const { + if (!Root.isReg()) + return false; + + MachineInstr *RootI = MRI.getVRegDef(Root.getReg()); + if (RootI->getOpcode() != TargetOpcode::G_GEP) + return false; + + MachineOperand &RHS = RootI->getOperand(2); + MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg()); + if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT) + return false; + + return true; +} + +bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, + MachineInstr &IntoMI) const { + // Immediate neighbours are already folded. + if (MI.getParent() == IntoMI.getParent() && + std::next(MI.getIterator()) == IntoMI.getIterator()) + return true; + + return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() && + MI.implicit_operands().begin() == MI.implicit_operands().end(); +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp new file mode 100644 index 000000000000..344f573a67f5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -0,0 +1,101 @@ +//===- lib/CodeGen/GlobalISel/LegalizerPredicates.cpp - Predicates --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A library of predicate factories to use for LegalityPredicate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +using namespace llvm; + +LegalityPredicate LegalityPredicates::typeIs(unsigned TypeIdx, LLT Type) { + return + [=](const LegalityQuery &Query) { return Query.Types[TypeIdx] == Type; }; +} + +LegalityPredicate +LegalityPredicates::typeInSet(unsigned TypeIdx, + std::initializer_list<LLT> TypesInit) { + SmallVector<LLT, 4> Types = TypesInit; + return [=](const LegalityQuery &Query) { + return std::find(Types.begin(), Types.end(), Query.Types[TypeIdx]) != Types.end(); + }; +} + +LegalityPredicate LegalityPredicates::typePairInSet( + unsigned TypeIdx0, unsigned TypeIdx1, + std::initializer_list<std::pair<LLT, LLT>> TypesInit) { + SmallVector<std::pair<LLT, LLT>, 4> Types = TypesInit; + return [=](const LegalityQuery &Query) { + std::pair<LLT, LLT> Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1]}; + return std::find(Types.begin(), Types.end(), Match) != Types.end(); + }; +} + +LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet( + unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx, + std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit) { + SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit; + return [=](const LegalityQuery &Query) { + TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1], + Query.MMODescrs[MMOIdx].Size}; + return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) != + TypesAndMemSize.end(); + }; +} + +LegalityPredicate LegalityPredicates::isScalar(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx].isScalar(); + }; +} + +LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx, + unsigned Size) { + return [=](const LegalityQuery &Query) { + const LLT &QueryTy = Query.Types[TypeIdx]; + return QueryTy.isScalar() && QueryTy.getSizeInBits() < Size; + }; +} + +LegalityPredicate LegalityPredicates::widerThan(unsigned TypeIdx, + unsigned Size) { + return [=](const LegalityQuery &Query) { + const LLT &QueryTy = Query.Types[TypeIdx]; + return QueryTy.isScalar() && QueryTy.getSizeInBits() > Size; + }; +} + +LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT &QueryTy = Query.Types[TypeIdx]; + return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits()); + }; +} + +LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { + return [=](const LegalityQuery &Query) { + return !isPowerOf2_32(Query.MMODescrs[MMOIdx].Size /* In Bytes */); + }; +} + +LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT &QueryTy = Query.Types[TypeIdx]; + return QueryTy.isVector() && isPowerOf2_32(QueryTy.getNumElements()); + }; +} + +LegalityPredicate LegalityPredicates::atomicOrderingAtLeastOrStrongerThan( + unsigned MMOIdx, AtomicOrdering Ordering) { + return [=](const LegalityQuery &Query) { + return isAtLeastOrStrongerThan(Query.MMODescrs[MMOIdx].Ordering, Ordering); + }; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp new file mode 100644 index 000000000000..a29b32ecdc03 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -0,0 +1,51 @@ +//===- lib/CodeGen/GlobalISel/LegalizerMutations.cpp - Mutations ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A library of mutation factories to use for LegalityMutation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +using namespace llvm; + +LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx, LLT Ty) { + return + [=](const LegalityQuery &Query) { return std::make_pair(TypeIdx, Ty); }; +} + +LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx, + unsigned FromTypeIdx) { + return [=](const LegalityQuery &Query) { + return std::make_pair(TypeIdx, Query.Types[FromTypeIdx]); + }; +} + +LegalizeMutation LegalizeMutations::widenScalarToNextPow2(unsigned TypeIdx, + unsigned Min) { + return [=](const LegalityQuery &Query) { + unsigned NewSizeInBits = + 1 << Log2_32_Ceil(Query.Types[TypeIdx].getSizeInBits()); + if (NewSizeInBits < Min) + NewSizeInBits = Min; + return std::make_pair(TypeIdx, LLT::scalar(NewSizeInBits)); + }; +} + +LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx, + unsigned Min) { + return [=](const LegalityQuery &Query) { + const LLT &VecTy = Query.Types[TypeIdx]; + unsigned NewNumElements = 1 << Log2_32_Ceil(VecTy.getNumElements()); + if (NewNumElements < Min) + NewNumElements = Min; + return std::make_pair( + TypeIdx, LLT::vector(NewNumElements, VecTy.getScalarSizeInBits())); + }; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp new file mode 100644 index 000000000000..9a2aac998a84 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -0,0 +1,187 @@ +//===-- llvm/CodeGen/GlobalISel/Legalizer.cpp -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file implements the LegalizerHelper class to legalize individual +/// instructions and the LegalizePass wrapper pass for the primary +/// legalization. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/GlobalISel/GISelWorkList.h" +#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/Debug.h" + +#include <iterator> + +#define DEBUG_TYPE "legalizer" + +using namespace llvm; + +char Legalizer::ID = 0; +INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE, + "Legalize the Machine IR a function's Machine IR", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE, + "Legalize the Machine IR a function's Machine IR", false, + false) + +Legalizer::Legalizer() : MachineFunctionPass(ID) { + initializeLegalizerPass(*PassRegistry::getPassRegistry()); +} + +void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetPassConfig>(); + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void Legalizer::init(MachineFunction &MF) { +} + +static bool isArtifact(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_MERGE_VALUES: + case TargetOpcode::G_UNMERGE_VALUES: + return true; + } +} + +bool Legalizer::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n'); + init(MF); + const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); + MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); + LegalizerHelper Helper(MF); + + const size_t NumBlocks = MF.size(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Populate Insts + GISelWorkList<256> InstList; + GISelWorkList<128> ArtifactList; + ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); + // Perform legalization bottom up so we can DCE as we legalize. + // Traverse BB in RPOT and within each basic block, add insts top down, + // so when we pop_back_val in the legalization process, we traverse bottom-up. + for (auto *MBB : RPOT) { + if (MBB->empty()) + continue; + for (MachineInstr &MI : *MBB) { + // Only legalize pre-isel generic instructions: others don't have types + // and are assumed to be legal. + if (!isPreISelGenericOpcode(MI.getOpcode())) + continue; + if (isArtifact(MI)) + ArtifactList.insert(&MI); + else + InstList.insert(&MI); + } + } + Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { + // Only legalize pre-isel generic instructions. + // Legalization process could generate Target specific pseudo + // instructions with generic types. Don't record them + if (isPreISelGenericOpcode(MI->getOpcode())) { + if (isArtifact(*MI)) + ArtifactList.insert(MI); + else + InstList.insert(MI); + } + LLVM_DEBUG(dbgs() << ".. .. New MI: " << *MI;); + }); + const LegalizerInfo &LInfo(Helper.getLegalizerInfo()); + LegalizationArtifactCombiner ArtCombiner(Helper.MIRBuilder, MF.getRegInfo(), LInfo); + auto RemoveDeadInstFromLists = [&InstList, + &ArtifactList](MachineInstr *DeadMI) { + InstList.remove(DeadMI); + ArtifactList.remove(DeadMI); + }; + bool Changed = false; + do { + while (!InstList.empty()) { + MachineInstr &MI = *InstList.pop_back_val(); + assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); + if (isTriviallyDead(MI, MRI)) { + LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); + MI.eraseFromParentAndMarkDBGValuesForRemoval(); + continue; + } + + // Do the legalization for this instruction. + auto Res = Helper.legalizeInstrStep(MI); + // Error out if we couldn't legalize this instruction. We may want to + // fall back to DAG ISel instead in the future. + if (Res == LegalizerHelper::UnableToLegalize) { + Helper.MIRBuilder.stopRecordingInsertions(); + reportGISelFailure(MF, TPC, MORE, "gisel-legalize", + "unable to legalize instruction", MI); + return false; + } + Changed |= Res == LegalizerHelper::Legalized; + } + while (!ArtifactList.empty()) { + MachineInstr &MI = *ArtifactList.pop_back_val(); + assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); + if (isTriviallyDead(MI, MRI)) { + LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n"); + RemoveDeadInstFromLists(&MI); + MI.eraseFromParentAndMarkDBGValuesForRemoval(); + continue; + } + SmallVector<MachineInstr *, 4> DeadInstructions; + if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) { + for (auto *DeadMI : DeadInstructions) { + LLVM_DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI); + RemoveDeadInstFromLists(DeadMI); + DeadMI->eraseFromParentAndMarkDBGValuesForRemoval(); + } + Changed = true; + continue; + } + // If this was not an artifact (that could be combined away), this might + // need special handling. Add it to InstList, so when it's processed + // there, it has to be legal or specially handled. + else + InstList.insert(&MI); + } + } while (!InstList.empty()); + + // For now don't support if new blocks are inserted - we would need to fix the + // outerloop for that. + if (MF.size() != NumBlocks) { + MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure", + MF.getFunction().getSubprogram(), + /*MBB=*/nullptr); + R << "inserting blocks is not supported yet"; + reportGISelFailure(MF, TPC, MORE, R); + return false; + } + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp new file mode 100644 index 000000000000..87086af121b7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -0,0 +1,1026 @@ +//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file implements the LegalizerHelper class to legalize +/// individual instructions and the LegalizeMachineIR wrapper pass for the +/// primary legalization. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + + +#define DEBUG_TYPE "legalizer" + +using namespace llvm; +using namespace LegalizeActions; + +LegalizerHelper::LegalizerHelper(MachineFunction &MF) + : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) { + MIRBuilder.setMF(MF); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { + LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); + + auto Step = LI.getAction(MI, MRI); + switch (Step.Action) { + case Legal: + LLVM_DEBUG(dbgs() << ".. Already legal\n"); + return AlreadyLegal; + case Libcall: + LLVM_DEBUG(dbgs() << ".. Convert to libcall\n"); + return libcall(MI); + case NarrowScalar: + LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); + return narrowScalar(MI, Step.TypeIdx, Step.NewType); + case WidenScalar: + LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); + return widenScalar(MI, Step.TypeIdx, Step.NewType); + case Lower: + LLVM_DEBUG(dbgs() << ".. Lower\n"); + return lower(MI, Step.TypeIdx, Step.NewType); + case FewerElements: + LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); + return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); + case Custom: + LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); + return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized + : UnableToLegalize; + default: + LLVM_DEBUG(dbgs() << ".. Unable to legalize\n"); + return UnableToLegalize; + } +} + +void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, + SmallVectorImpl<unsigned> &VRegs) { + for (int i = 0; i < NumParts; ++i) + VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); + MIRBuilder.buildUnmerge(VRegs, Reg); +} + +static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { + switch (Opcode) { + case TargetOpcode::G_SDIV: + assert(Size == 32 && "Unsupported size"); + return RTLIB::SDIV_I32; + case TargetOpcode::G_UDIV: + assert(Size == 32 && "Unsupported size"); + return RTLIB::UDIV_I32; + case TargetOpcode::G_SREM: + assert(Size == 32 && "Unsupported size"); + return RTLIB::SREM_I32; + case TargetOpcode::G_UREM: + assert(Size == 32 && "Unsupported size"); + return RTLIB::UREM_I32; + case TargetOpcode::G_FADD: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; + case TargetOpcode::G_FSUB: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32; + case TargetOpcode::G_FMUL: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32; + case TargetOpcode::G_FDIV: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; + case TargetOpcode::G_FREM: + return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; + case TargetOpcode::G_FPOW: + return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; + case TargetOpcode::G_FMA: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; + } + llvm_unreachable("Unknown libcall function"); +} + +LegalizerHelper::LegalizeResult +llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, + const CallLowering::ArgInfo &Result, + ArrayRef<CallLowering::ArgInfo> Args) { + auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + const char *Name = TLI.getLibcallName(Libcall); + + MIRBuilder.getMF().getFrameInfo().setHasCalls(true); + if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), + MachineOperand::CreateES(Name), Result, Args)) + return LegalizerHelper::UnableToLegalize; + + return LegalizerHelper::Legalized; +} + +// Useful for libcalls where all operands have the same type. +static LegalizerHelper::LegalizeResult +simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, + Type *OpType) { + auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + + SmallVector<CallLowering::ArgInfo, 3> Args; + for (unsigned i = 1; i < MI.getNumOperands(); i++) + Args.push_back({MI.getOperand(i).getReg(), OpType}); + return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, + Args); +} + +static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, + Type *FromType) { + auto ToMVT = MVT::getVT(ToType); + auto FromMVT = MVT::getVT(FromType); + + switch (Opcode) { + case TargetOpcode::G_FPEXT: + return RTLIB::getFPEXT(FromMVT, ToMVT); + case TargetOpcode::G_FPTRUNC: + return RTLIB::getFPROUND(FromMVT, ToMVT); + case TargetOpcode::G_FPTOSI: + return RTLIB::getFPTOSINT(FromMVT, ToMVT); + case TargetOpcode::G_FPTOUI: + return RTLIB::getFPTOUINT(FromMVT, ToMVT); + case TargetOpcode::G_SITOFP: + return RTLIB::getSINTTOFP(FromMVT, ToMVT); + case TargetOpcode::G_UITOFP: + return RTLIB::getUINTTOFP(FromMVT, ToMVT); + } + llvm_unreachable("Unsupported libcall function"); +} + +static LegalizerHelper::LegalizeResult +conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, + Type *FromType) { + RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); + return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, + {{MI.getOperand(1).getReg(), FromType}}); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::libcall(MachineInstr &MI) { + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); + + MIRBuilder.setInstr(MI); + + switch (MI.getOpcode()) { + default: + return UnableToLegalize; + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: { + Type *HLTy = Type::getInt32Ty(Ctx); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + if (Status != Legalized) + return Status; + break; + } + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FMA: + case TargetOpcode::G_FPOW: + case TargetOpcode::G_FREM: { + Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + if (Status != Legalized) + return Status; + break; + } + case TargetOpcode::G_FPEXT: { + // FIXME: Support other floating point types (half, fp128 etc) + unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + if (ToSize != 64 || FromSize != 32) + return UnableToLegalize; + LegalizeResult Status = conversionLibcall( + MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); + if (Status != Legalized) + return Status; + break; + } + case TargetOpcode::G_FPTRUNC: { + // FIXME: Support other floating point types (half, fp128 etc) + unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + if (ToSize != 32 || FromSize != 64) + return UnableToLegalize; + LegalizeResult Status = conversionLibcall( + MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); + if (Status != Legalized) + return Status; + break; + } + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: { + // FIXME: Support other types + unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) + return UnableToLegalize; + LegalizeResult Status = conversionLibcall( + MI, MIRBuilder, Type::getInt32Ty(Ctx), + FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); + if (Status != Legalized) + return Status; + break; + } + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: { + // FIXME: Support other types + unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) + return UnableToLegalize; + LegalizeResult Status = conversionLibcall( + MI, MIRBuilder, + ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), + Type::getInt32Ty(Ctx)); + if (Status != Legalized) + return Status; + break; + } + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + // FIXME: Don't know how to handle secondary types yet. + if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) + return UnableToLegalize; + + MIRBuilder.setInstr(MI); + + uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + uint64_t NarrowSize = NarrowTy.getSizeInBits(); + + switch (MI.getOpcode()) { + default: + return UnableToLegalize; + case TargetOpcode::G_IMPLICIT_DEF: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; + + SmallVector<unsigned, 2> DstRegs; + for (int i = 0; i < NumParts; ++i) + DstRegs.push_back( + MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ADD: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + // Expand in terms of carry-setting/consuming G_ADDE instructions. + int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); + + SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); + extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); + + unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); + MIRBuilder.buildConstant(CarryIn, 0); + + for (int i = 0; i < NumParts; ++i) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); + unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); + + MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], + Src2Regs[i], CarryIn); + + DstRegs.push_back(DstReg); + CarryIn = CarryOut; + } + unsigned DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_EXTRACT: { + if (TypeIdx != 1) + return UnableToLegalize; + + int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + // FIXME: add support for when SizeOp1 isn't an exact multiple of + // NarrowSize. + if (SizeOp1 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp1 / NarrowSize; + + SmallVector<unsigned, 2> SrcRegs, DstRegs; + SmallVector<uint64_t, 2> Indexes; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + + unsigned OpReg = MI.getOperand(0).getReg(); + uint64_t OpStart = MI.getOperand(2).getImm(); + uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); + for (int i = 0; i < NumParts; ++i) { + unsigned SrcStart = i * NarrowSize; + + if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { + // No part of the extract uses this subregister, ignore it. + continue; + } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { + // The entire subregister is extracted, forward the value. + DstRegs.push_back(SrcRegs[i]); + continue; + } + + // OpSegStart is where this destination segment would start in OpReg if it + // extended infinitely in both directions. + int64_t ExtractOffset; + uint64_t SegSize; + if (OpStart < SrcStart) { + ExtractOffset = 0; + SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); + } else { + ExtractOffset = OpStart - SrcStart; + SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); + } + + unsigned SegReg = SrcRegs[i]; + if (ExtractOffset != 0 || SegSize != NarrowSize) { + // A genuine extract is needed. + SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); + MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); + } + + DstRegs.push_back(SegReg); + } + + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_INSERT: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + + int NumParts = SizeOp0 / NarrowSize; + + SmallVector<unsigned, 2> SrcRegs, DstRegs; + SmallVector<uint64_t, 2> Indexes; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + + unsigned OpReg = MI.getOperand(2).getReg(); + uint64_t OpStart = MI.getOperand(3).getImm(); + uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); + for (int i = 0; i < NumParts; ++i) { + unsigned DstStart = i * NarrowSize; + + if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { + // No part of the insert affects this subregister, forward the original. + DstRegs.push_back(SrcRegs[i]); + continue; + } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { + // The entire subregister is defined by this insert, forward the new + // value. + DstRegs.push_back(OpReg); + continue; + } + + // OpSegStart is where this destination segment would start in OpReg if it + // extended infinitely in both directions. + int64_t ExtractOffset, InsertOffset; + uint64_t SegSize; + if (OpStart < DstStart) { + InsertOffset = 0; + ExtractOffset = DstStart - OpStart; + SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); + } else { + InsertOffset = OpStart - DstStart; + ExtractOffset = 0; + SegSize = + std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); + } + + unsigned SegReg = OpReg; + if (ExtractOffset != 0 || SegSize != OpSize) { + // A genuine extract is needed. + SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); + MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); + } + + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); + DstRegs.push_back(DstReg); + } + + assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_LOAD: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + + const auto &MMO = **MI.memoperands_begin(); + // This implementation doesn't work for atomics. Give up instead of doing + // something invalid. + if (MMO.getOrdering() != AtomicOrdering::NotAtomic || + MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) + return UnableToLegalize; + + int NumParts = SizeOp0 / NarrowSize; + LLT OffsetTy = LLT::scalar( + MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); + + SmallVector<unsigned, 2> DstRegs; + for (int i = 0; i < NumParts; ++i) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); + unsigned SrcReg = 0; + unsigned Adjustment = i * NarrowSize / 8; + + MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( + MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), + NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, + MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), + MMO.getOrdering(), MMO.getFailureOrdering()); + + MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, + Adjustment); + + MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); + + DstRegs.push_back(DstReg); + } + unsigned DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_STORE: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + + const auto &MMO = **MI.memoperands_begin(); + // This implementation doesn't work for atomics. Give up instead of doing + // something invalid. + if (MMO.getOrdering() != AtomicOrdering::NotAtomic || + MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) + return UnableToLegalize; + + int NumParts = SizeOp0 / NarrowSize; + LLT OffsetTy = LLT::scalar( + MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); + + SmallVector<unsigned, 2> SrcRegs; + extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); + + for (int i = 0; i < NumParts; ++i) { + unsigned DstReg = 0; + unsigned Adjustment = i * NarrowSize / 8; + + MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( + MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), + NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, + MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), + MMO.getOrdering(), MMO.getFailureOrdering()); + + MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, + Adjustment); + + MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); + } + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_CONSTANT: { + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; + const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); + + SmallVector<unsigned, 2> DstRegs; + for (int i = 0; i < NumParts; ++i) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); + ConstantInt *CI = + ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); + MIRBuilder.buildConstant(DstReg, *CI); + DstRegs.push_back(DstReg); + } + unsigned DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_OR: { + // Legalize bitwise operation: + // A = BinOp<Ty> B, C + // into: + // B1, ..., BN = G_UNMERGE_VALUES B + // C1, ..., CN = G_UNMERGE_VALUES C + // A1 = BinOp<Ty/N> B1, C2 + // ... + // AN = BinOp<Ty/N> BN, CN + // A = G_MERGE_VALUES A1, ..., AN + + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; + + // List the registers where the destination will be scattered. + SmallVector<unsigned, 2> DstRegs; + // List the registers where the first argument will be split. + SmallVector<unsigned, 2> SrcsReg1; + // List the registers where the second argument will be split. + SmallVector<unsigned, 2> SrcsReg2; + // Create all the temporary registers. + for (int i = 0; i < NumParts; ++i) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); + unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); + unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); + + DstRegs.push_back(DstReg); + SrcsReg1.push_back(SrcReg1); + SrcsReg2.push_back(SrcReg2); + } + // Explode the big arguments into smaller chunks. + MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); + MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); + + // Do the operation on each small part. + for (int i = 0; i < NumParts; ++i) + MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]); + + // Gather the destination registers into the final destination. + unsigned DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } + } +} + +void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, + unsigned OpIdx, unsigned ExtOpcode) { + MachineOperand &MO = MI.getOperand(OpIdx); + auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg()); + MO.setReg(ExtB->getOperand(0).getReg()); +} + +void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, + unsigned OpIdx, unsigned TruncOpcode) { + MachineOperand &MO = MI.getOperand(OpIdx); + unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt); + MO.setReg(DstExt); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { + MIRBuilder.setInstr(MI); + + switch (MI.getOpcode()) { + default: + return UnableToLegalize; + + case TargetOpcode::G_ADD: + case TargetOpcode::G_AND: + case TargetOpcode::G_MUL: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: + case TargetOpcode::G_SUB: + // Perform operation at larger width (any extension is fine here, high bits + // don't affect the result) and then truncate the result back to the + // original type. + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_SHL: + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + // The "number of bits to shift" operand must preserve its value as an + // unsigned integer: + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_SDIV: + case TargetOpcode::G_SREM: + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_ASHR: + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + // The "number of bits to shift" operand must preserve its value as an + // unsigned integer: + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_UDIV: + case TargetOpcode::G_UREM: + case TargetOpcode::G_LSHR: + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_SELECT: + if (TypeIdx != 0) + return UnableToLegalize; + // Perform operation at larger width (any extension is fine here, high bits + // don't affect the result) and then truncate the result back to the + // original type. + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: + if (TypeIdx != 0) + return UnableToLegalize; + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_SITOFP: + if (TypeIdx != 1) + return UnableToLegalize; + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_UITOFP: + if (TypeIdx != 1) + return UnableToLegalize; + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_INSERT: + if (TypeIdx != 0) + return UnableToLegalize; + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_LOAD: + // For some types like i24, we might try to widen to i32. To properly handle + // this we should be using a dedicated extending load, until then avoid + // trying to legalize. + if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != + WideTy.getSizeInBits()) + return UnableToLegalize; + LLVM_FALLTHROUGH; + case TargetOpcode::G_SEXTLOAD: + case TargetOpcode::G_ZEXTLOAD: + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_STORE: { + if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) || + WideTy != LLT::scalar(8)) + return UnableToLegalize; + + widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); + MIRBuilder.recordInsertion(&MI); + return Legalized; + } + case TargetOpcode::G_CONSTANT: { + MachineOperand &SrcMO = MI.getOperand(1); + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); + const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); + SrcMO.setCImm(ConstantInt::get(Ctx, Val)); + + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + } + case TargetOpcode::G_FCONSTANT: { + MachineOperand &SrcMO = MI.getOperand(1); + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); + APFloat Val = SrcMO.getFPImm()->getValueAPF(); + bool LosesInfo; + switch (WideTy.getSizeInBits()) { + case 32: + Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); + break; + case 64: + Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); + break; + default: + llvm_unreachable("Unhandled fp widen type"); + } + SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); + + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + MIRBuilder.recordInsertion(&MI); + return Legalized; + } + case TargetOpcode::G_BRCOND: + widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_FCMP: + if (TypeIdx == 0) + widenScalarDst(MI, WideTy); + else { + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT); + } + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_ICMP: + if (TypeIdx == 0) + widenScalarDst(MI, WideTy); + else { + unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( + MI.getOperand(1).getPredicate())) + ? TargetOpcode::G_SEXT + : TargetOpcode::G_ZEXT; + widenScalarSrc(MI, WideTy, 2, ExtOpcode); + widenScalarSrc(MI, WideTy, 3, ExtOpcode); + } + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_GEP: + assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); + MIRBuilder.recordInsertion(&MI); + return Legalized; + + case TargetOpcode::G_PHI: { + assert(TypeIdx == 0 && "Expecting only Idx 0"); + + for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { + MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT); + } + + MachineBasicBlock &MBB = *MI.getParent(); + MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + } + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + using namespace TargetOpcode; + MIRBuilder.setInstr(MI); + + switch(MI.getOpcode()) { + default: + return UnableToLegalize; + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: { + unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) + .addDef(QuotReg) + .addUse(MI.getOperand(1).getReg()) + .addUse(MI.getOperand(2).getReg()); + + unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); + MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), + ProdReg); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_SMULO: + case TargetOpcode::G_UMULO: { + // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the + // result. + unsigned Res = MI.getOperand(0).getReg(); + unsigned Overflow = MI.getOperand(1).getReg(); + unsigned LHS = MI.getOperand(2).getReg(); + unsigned RHS = MI.getOperand(3).getReg(); + + MIRBuilder.buildMul(Res, LHS, RHS); + + unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO + ? TargetOpcode::G_SMULH + : TargetOpcode::G_UMULH; + + unsigned HiPart = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildInstr(Opcode) + .addDef(HiPart) + .addUse(LHS) + .addUse(RHS); + + unsigned Zero = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildConstant(Zero, 0); + + // For *signed* multiply, overflow is detected by checking: + // (hi != (lo >> bitwidth-1)) + if (Opcode == TargetOpcode::G_SMULH) { + unsigned Shifted = MRI.createGenericVirtualRegister(Ty); + unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); + MIRBuilder.buildInstr(TargetOpcode::G_ASHR) + .addDef(Shifted) + .addUse(Res) + .addUse(ShiftAmt); + MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); + } else { + MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); + } + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_FNEG: { + // TODO: Handle vector types once we are able to + // represent them. + if (Ty.isVector()) + return UnableToLegalize; + unsigned Res = MI.getOperand(0).getReg(); + Type *ZeroTy; + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); + switch (Ty.getSizeInBits()) { + case 16: + ZeroTy = Type::getHalfTy(Ctx); + break; + case 32: + ZeroTy = Type::getFloatTy(Ctx); + break; + case 64: + ZeroTy = Type::getDoubleTy(Ctx); + break; + case 128: + ZeroTy = Type::getFP128Ty(Ctx); + break; + default: + llvm_unreachable("unexpected floating-point type"); + } + ConstantFP &ZeroForNegation = + *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); + auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); + MIRBuilder.buildInstr(TargetOpcode::G_FSUB) + .addDef(Res) + .addUse(Zero->getOperand(0).getReg()) + .addUse(MI.getOperand(1).getReg()); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_FSUB: { + // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)). + // First, check if G_FNEG is marked as Lower. If so, we may + // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. + if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) + return UnableToLegalize; + unsigned Res = MI.getOperand(0).getReg(); + unsigned LHS = MI.getOperand(1).getReg(); + unsigned RHS = MI.getOperand(2).getReg(); + unsigned Neg = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); + MIRBuilder.buildInstr(TargetOpcode::G_FADD) + .addDef(Res) + .addUse(LHS) + .addUse(Neg); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { + unsigned OldValRes = MI.getOperand(0).getReg(); + unsigned SuccessRes = MI.getOperand(1).getReg(); + unsigned Addr = MI.getOperand(2).getReg(); + unsigned CmpVal = MI.getOperand(3).getReg(); + unsigned NewVal = MI.getOperand(4).getReg(); + MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, + **MI.memoperands_begin()); + MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_LOAD: + case TargetOpcode::G_SEXTLOAD: + case TargetOpcode::G_ZEXTLOAD: { + // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned PtrReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + auto &MMO = **MI.memoperands_begin(); + + if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { + // In the case of G_LOAD, this was a non-extending load already and we're + // about to lower to the same instruction. + if (MI.getOpcode() == TargetOpcode::G_LOAD) + return UnableToLegalize; + MIRBuilder.buildLoad(DstReg, PtrReg, MMO); + MI.eraseFromParent(); + return Legalized; + } + + if (DstTy.isScalar()) { + unsigned TmpReg = MRI.createGenericVirtualRegister( + LLT::scalar(MMO.getSize() /* in bytes */ * 8)); + MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_LOAD: + MIRBuilder.buildAnyExt(DstReg, TmpReg); + break; + case TargetOpcode::G_SEXTLOAD: + MIRBuilder.buildSExt(DstReg, TmpReg); + break; + case TargetOpcode::G_ZEXTLOAD: + MIRBuilder.buildZExt(DstReg, TmpReg); + break; + } + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; + } + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + // FIXME: Don't know how to handle secondary types yet. + if (TypeIdx != 0) + return UnableToLegalize; + switch (MI.getOpcode()) { + default: + return UnableToLegalize; + case TargetOpcode::G_ADD: { + unsigned NarrowSize = NarrowTy.getSizeInBits(); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned Size = MRI.getType(DstReg).getSizeInBits(); + int NumParts = Size / NarrowSize; + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (Size % NarrowSize != 0) + return UnableToLegalize; + + MIRBuilder.setInstr(MI); + + SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); + extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); + + for (int i = 0; i < NumParts; ++i) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]); + DstRegs.push_back(DstReg); + } + + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } + } +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp new file mode 100644 index 000000000000..ae061b64a38c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -0,0 +1,591 @@ +//===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implement an interface to specify and query how an illegal operation on a +// given type should be expanded. +// +// Issues to be resolved: +// + Make it fast. +// + Support weird types like i3, <7 x i3>, ... +// + Operations with more than one type (ICMP, CMPXCHG, intrinsics, ...) +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <map> + +using namespace llvm; +using namespace LegalizeActions; + +#define DEBUG_TYPE "legalizer-info" + +cl::opt<bool> llvm::DisableGISelLegalityCheck( + "disable-gisel-legality-check", + cl::desc("Don't verify that MIR is fully legal between GlobalISel passes"), + cl::Hidden); + +raw_ostream &LegalityQuery::print(raw_ostream &OS) const { + OS << Opcode << ", Tys={"; + for (const auto &Type : Types) { + OS << Type << ", "; + } + OS << "}, Opcode="; + + OS << Opcode << ", MMOs={"; + for (const auto &MMODescr : MMODescrs) { + OS << MMODescr.Size << ", "; + } + OS << "}"; + + return OS; +} + +LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const { + LLVM_DEBUG(dbgs() << "Applying legalizer ruleset to: "; Query.print(dbgs()); + dbgs() << "\n"); + if (Rules.empty()) { + LLVM_DEBUG(dbgs() << ".. fallback to legacy rules (no rules defined)\n"); + return {LegalizeAction::UseLegacyRules, 0, LLT{}}; + } + for (const auto &Rule : Rules) { + if (Rule.match(Query)) { + LLVM_DEBUG(dbgs() << ".. match\n"); + std::pair<unsigned, LLT> Mutation = Rule.determineMutation(Query); + LLVM_DEBUG(dbgs() << ".. .. " << (unsigned)Rule.getAction() << ", " + << Mutation.first << ", " << Mutation.second << "\n"); + assert((Query.Types[Mutation.first] != Mutation.second || + Rule.getAction() == Lower || + Rule.getAction() == MoreElements || + Rule.getAction() == FewerElements) && + "Simple loop detected"); + return {Rule.getAction(), Mutation.first, Mutation.second}; + } else + LLVM_DEBUG(dbgs() << ".. no match\n"); + } + LLVM_DEBUG(dbgs() << ".. unsupported\n"); + return {LegalizeAction::Unsupported, 0, LLT{}}; +} + +bool LegalizeRuleSet::verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const { +#ifndef NDEBUG + if (Rules.empty()) { + LLVM_DEBUG( + dbgs() << ".. type index coverage check SKIPPED: no rules defined\n"); + return true; + } + const int64_t FirstUncovered = TypeIdxsCovered.find_first_unset(); + if (FirstUncovered < 0) { + LLVM_DEBUG(dbgs() << ".. type index coverage check SKIPPED:" + " user-defined predicate detected\n"); + return true; + } + const bool AllCovered = (FirstUncovered >= NumTypeIdxs); + LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered + << ", " << (AllCovered ? "OK" : "FAIL") << "\n"); + return AllCovered; +#else + return true; +#endif +} + +LegalizerInfo::LegalizerInfo() : TablesInitialized(false) { + // Set defaults. + // FIXME: these two (G_ANYEXT and G_TRUNC?) can be legalized to the + // fundamental load/store Jakob proposed. Once loads & stores are supported. + setScalarAction(TargetOpcode::G_ANYEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_ZEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_SEXT, 1, {{1, Legal}}); + setScalarAction(TargetOpcode::G_TRUNC, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_TRUNC, 1, {{1, Legal}}); + + setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}}); + setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}}); + + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_ADD, 0, widenToLargerTypesAndNarrowToLargest); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_OR, 0, widenToLargerTypesAndNarrowToLargest); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_LOAD, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_STORE, 0, narrowToSmallerAndUnsupportedIfTooSmall); + + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_BRCOND, 0, widenToLargerTypesUnsupportedOtherwise); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_INSERT, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_EXTRACT, 0, narrowToSmallerAndUnsupportedIfTooSmall); + setLegalizeScalarToDifferentSizeStrategy( + TargetOpcode::G_EXTRACT, 1, narrowToSmallerAndUnsupportedIfTooSmall); + setScalarAction(TargetOpcode::G_FNEG, 0, {{1, Lower}}); +} + +void LegalizerInfo::computeTables() { + assert(TablesInitialized == false); + + for (unsigned OpcodeIdx = 0; OpcodeIdx <= LastOp - FirstOp; ++OpcodeIdx) { + const unsigned Opcode = FirstOp + OpcodeIdx; + for (unsigned TypeIdx = 0; TypeIdx != SpecifiedActions[OpcodeIdx].size(); + ++TypeIdx) { + // 0. Collect information specified through the setAction API, i.e. + // for specific bit sizes. + // For scalar types: + SizeAndActionsVec ScalarSpecifiedActions; + // For pointer types: + std::map<uint16_t, SizeAndActionsVec> AddressSpace2SpecifiedActions; + // For vector types: + std::map<uint16_t, SizeAndActionsVec> ElemSize2SpecifiedActions; + for (auto LLT2Action : SpecifiedActions[OpcodeIdx][TypeIdx]) { + const LLT Type = LLT2Action.first; + const LegalizeAction Action = LLT2Action.second; + + auto SizeAction = std::make_pair(Type.getSizeInBits(), Action); + if (Type.isPointer()) + AddressSpace2SpecifiedActions[Type.getAddressSpace()].push_back( + SizeAction); + else if (Type.isVector()) + ElemSize2SpecifiedActions[Type.getElementType().getSizeInBits()] + .push_back(SizeAction); + else + ScalarSpecifiedActions.push_back(SizeAction); + } + + // 1. Handle scalar types + { + // Decide how to handle bit sizes for which no explicit specification + // was given. + SizeChangeStrategy S = &unsupportedForDifferentSizes; + if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() && + ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) + S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx]; + llvm::sort(ScalarSpecifiedActions.begin(), + ScalarSpecifiedActions.end()); + checkPartialSizeAndActionsVector(ScalarSpecifiedActions); + setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions)); + } + + // 2. Handle pointer types + for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) { + llvm::sort(PointerSpecifiedActions.second.begin(), + PointerSpecifiedActions.second.end()); + checkPartialSizeAndActionsVector(PointerSpecifiedActions.second); + // For pointer types, we assume that there isn't a meaningfull way + // to change the number of bits used in the pointer. + setPointerAction( + Opcode, TypeIdx, PointerSpecifiedActions.first, + unsupportedForDifferentSizes(PointerSpecifiedActions.second)); + } + + // 3. Handle vector types + SizeAndActionsVec ElementSizesSeen; + for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) { + llvm::sort(VectorSpecifiedActions.second.begin(), + VectorSpecifiedActions.second.end()); + const uint16_t ElementSize = VectorSpecifiedActions.first; + ElementSizesSeen.push_back({ElementSize, Legal}); + checkPartialSizeAndActionsVector(VectorSpecifiedActions.second); + // For vector types, we assume that the best way to adapt the number + // of elements is to the next larger number of elements type for which + // the vector type is legal, unless there is no such type. In that case, + // legalize towards a vector type with a smaller number of elements. + SizeAndActionsVec NumElementsActions; + for (SizeAndAction BitsizeAndAction : VectorSpecifiedActions.second) { + assert(BitsizeAndAction.first % ElementSize == 0); + const uint16_t NumElements = BitsizeAndAction.first / ElementSize; + NumElementsActions.push_back({NumElements, BitsizeAndAction.second}); + } + setVectorNumElementAction( + Opcode, TypeIdx, ElementSize, + moreToWiderTypesAndLessToWidest(NumElementsActions)); + } + llvm::sort(ElementSizesSeen.begin(), ElementSizesSeen.end()); + SizeChangeStrategy VectorElementSizeChangeStrategy = + &unsupportedForDifferentSizes; + if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() && + VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr) + VectorElementSizeChangeStrategy = + VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx]; + setScalarInVectorAction( + Opcode, TypeIdx, VectorElementSizeChangeStrategy(ElementSizesSeen)); + } + } + + TablesInitialized = true; +} + +// FIXME: inefficient implementation for now. Without ComputeValueVTs we're +// probably going to need specialized lookup structures for various types before +// we have any hope of doing well with something like <13 x i3>. Even the common +// cases should do better than what we have now. +std::pair<LegalizeAction, LLT> +LegalizerInfo::getAspectAction(const InstrAspect &Aspect) const { + assert(TablesInitialized && "backend forgot to call computeTables"); + // These *have* to be implemented for now, they're the fundamental basis of + // how everything else is transformed. + if (Aspect.Type.isScalar() || Aspect.Type.isPointer()) + return findScalarLegalAction(Aspect); + assert(Aspect.Type.isVector()); + return findVectorLegalAction(Aspect); +} + +/// Helper function to get LLT for the given type index. +static LLT getTypeFromTypeIdx(const MachineInstr &MI, + const MachineRegisterInfo &MRI, unsigned OpIdx, + unsigned TypeIdx) { + assert(TypeIdx < MI.getNumOperands() && "Unexpected TypeIdx"); + // G_UNMERGE_VALUES has variable number of operands, but there is only + // one source type and one destination type as all destinations must be the + // same type. So, get the last operand if TypeIdx == 1. + if (MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && TypeIdx == 1) + return MRI.getType(MI.getOperand(MI.getNumOperands() - 1).getReg()); + return MRI.getType(MI.getOperand(OpIdx).getReg()); +} + +unsigned LegalizerInfo::getOpcodeIdxForOpcode(unsigned Opcode) const { + assert(Opcode >= FirstOp && Opcode <= LastOp && "Unsupported opcode"); + return Opcode - FirstOp; +} + +unsigned LegalizerInfo::getActionDefinitionsIdx(unsigned Opcode) const { + unsigned OpcodeIdx = getOpcodeIdxForOpcode(Opcode); + if (unsigned Alias = RulesForOpcode[OpcodeIdx].getAlias()) { + LLVM_DEBUG(dbgs() << ".. opcode " << Opcode << " is aliased to " << Alias + << "\n"); + OpcodeIdx = getOpcodeIdxForOpcode(Alias); + LLVM_DEBUG(dbgs() << ".. opcode " << Alias << " is aliased to " + << RulesForOpcode[OpcodeIdx].getAlias() << "\n"); + assert(RulesForOpcode[OpcodeIdx].getAlias() == 0 && "Cannot chain aliases"); + } + + return OpcodeIdx; +} + +const LegalizeRuleSet & +LegalizerInfo::getActionDefinitions(unsigned Opcode) const { + unsigned OpcodeIdx = getActionDefinitionsIdx(Opcode); + return RulesForOpcode[OpcodeIdx]; +} + +LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(unsigned Opcode) { + unsigned OpcodeIdx = getActionDefinitionsIdx(Opcode); + auto &Result = RulesForOpcode[OpcodeIdx]; + assert(!Result.isAliasedByAnother() && "Modifying this opcode will modify aliases"); + return Result; +} + +LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder( + std::initializer_list<unsigned> Opcodes) { + unsigned Representative = *Opcodes.begin(); + + assert(Opcodes.begin() != Opcodes.end() && + Opcodes.begin() + 1 != Opcodes.end() && + "Initializer list must have at least two opcodes"); + + for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I) + aliasActionDefinitions(Representative, *I); + + auto &Return = getActionDefinitionsBuilder(Representative); + Return.setIsAliasedByAnother(); + return Return; +} + +void LegalizerInfo::aliasActionDefinitions(unsigned OpcodeTo, + unsigned OpcodeFrom) { + assert(OpcodeTo != OpcodeFrom && "Cannot alias to self"); + assert(OpcodeTo >= FirstOp && OpcodeTo <= LastOp && "Unsupported opcode"); + const unsigned OpcodeFromIdx = getOpcodeIdxForOpcode(OpcodeFrom); + RulesForOpcode[OpcodeFromIdx].aliasTo(OpcodeTo); +} + +LegalizeActionStep +LegalizerInfo::getAction(const LegalityQuery &Query) const { + LegalizeActionStep Step = getActionDefinitions(Query.Opcode).apply(Query); + if (Step.Action != LegalizeAction::UseLegacyRules) { + return Step; + } + + for (unsigned i = 0; i < Query.Types.size(); ++i) { + auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]}); + if (Action.first != Legal) { + LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i + << " Action=" << (unsigned)Action.first << ", " + << Action.second << "\n"); + return {Action.first, i, Action.second}; + } else + LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n"); + } + LLVM_DEBUG(dbgs() << ".. (legacy) Legal\n"); + return {Legal, 0, LLT{}}; +} + +LegalizeActionStep +LegalizerInfo::getAction(const MachineInstr &MI, + const MachineRegisterInfo &MRI) const { + SmallVector<LLT, 2> Types; + SmallBitVector SeenTypes(8); + const MCOperandInfo *OpInfo = MI.getDesc().OpInfo; + // FIXME: probably we'll need to cache the results here somehow? + for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) { + if (!OpInfo[i].isGenericType()) + continue; + + // We must only record actions once for each TypeIdx; otherwise we'd + // try to legalize operands multiple times down the line. + unsigned TypeIdx = OpInfo[i].getGenericTypeIndex(); + if (SeenTypes[TypeIdx]) + continue; + + SeenTypes.set(TypeIdx); + + LLT Ty = getTypeFromTypeIdx(MI, MRI, i, TypeIdx); + Types.push_back(Ty); + } + + SmallVector<LegalityQuery::MemDesc, 2> MemDescrs; + for (const auto &MMO : MI.memoperands()) + MemDescrs.push_back( + {MMO->getSize() /* in bytes */ * 8, MMO->getOrdering()}); + + return getAction({MI.getOpcode(), Types, MemDescrs}); +} + +bool LegalizerInfo::isLegal(const MachineInstr &MI, + const MachineRegisterInfo &MRI) const { + return getAction(MI, MRI).Action == Legal; +} + +bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + return false; +} + +LegalizerInfo::SizeAndActionsVec +LegalizerInfo::increaseToLargerTypesAndDecreaseToLargest( + const SizeAndActionsVec &v, LegalizeAction IncreaseAction, + LegalizeAction DecreaseAction) { + SizeAndActionsVec result; + unsigned LargestSizeSoFar = 0; + if (v.size() >= 1 && v[0].first != 1) + result.push_back({1, IncreaseAction}); + for (size_t i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + LargestSizeSoFar = v[i].first; + if (i + 1 < v.size() && v[i + 1].first != v[i].first + 1) { + result.push_back({LargestSizeSoFar + 1, IncreaseAction}); + LargestSizeSoFar = v[i].first + 1; + } + } + result.push_back({LargestSizeSoFar + 1, DecreaseAction}); + return result; +} + +LegalizerInfo::SizeAndActionsVec +LegalizerInfo::decreaseToSmallerTypesAndIncreaseToSmallest( + const SizeAndActionsVec &v, LegalizeAction DecreaseAction, + LegalizeAction IncreaseAction) { + SizeAndActionsVec result; + if (v.size() == 0 || v[0].first != 1) + result.push_back({1, IncreaseAction}); + for (size_t i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + if (i + 1 == v.size() || v[i + 1].first != v[i].first + 1) { + result.push_back({v[i].first + 1, DecreaseAction}); + } + } + return result; +} + +LegalizerInfo::SizeAndAction +LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) { + assert(Size >= 1); + // Find the last element in Vec that has a bitsize equal to or smaller than + // the requested bit size. + // That is the element just before the first element that is bigger than Size. + auto VecIt = std::upper_bound( + Vec.begin(), Vec.end(), Size, + [](const uint32_t Size, const SizeAndAction lhs) -> bool { + return Size < lhs.first; + }); + assert(VecIt != Vec.begin() && "Does Vec not start with size 1?"); + --VecIt; + int VecIdx = VecIt - Vec.begin(); + + LegalizeAction Action = Vec[VecIdx].second; + switch (Action) { + case Legal: + case Lower: + case Libcall: + case Custom: + return {Size, Action}; + case FewerElements: + // FIXME: is this special case still needed and correct? + // Special case for scalarization: + if (Vec == SizeAndActionsVec({{1, FewerElements}})) + return {1, FewerElements}; + LLVM_FALLTHROUGH; + case NarrowScalar: { + // The following needs to be a loop, as for now, we do allow needing to + // go over "Unsupported" bit sizes before finding a legalizable bit size. + // e.g. (s8, WidenScalar), (s9, Unsupported), (s32, Legal). if Size==8, + // we need to iterate over s9, and then to s32 to return (s32, Legal). + // If we want to get rid of the below loop, we should have stronger asserts + // when building the SizeAndActionsVecs, probably not allowing + // "Unsupported" unless at the ends of the vector. + for (int i = VecIdx - 1; i >= 0; --i) + if (!needsLegalizingToDifferentSize(Vec[i].second) && + Vec[i].second != Unsupported) + return {Vec[i].first, Action}; + llvm_unreachable(""); + } + case WidenScalar: + case MoreElements: { + // See above, the following needs to be a loop, at least for now. + for (std::size_t i = VecIdx + 1; i < Vec.size(); ++i) + if (!needsLegalizingToDifferentSize(Vec[i].second) && + Vec[i].second != Unsupported) + return {Vec[i].first, Action}; + llvm_unreachable(""); + } + case Unsupported: + return {Size, Unsupported}; + case NotFound: + case UseLegacyRules: + llvm_unreachable("NotFound"); + } + llvm_unreachable("Action has an unknown enum value"); +} + +std::pair<LegalizeAction, LLT> +LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const { + assert(Aspect.Type.isScalar() || Aspect.Type.isPointer()); + if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) + return {NotFound, LLT()}; + const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode); + if (Aspect.Type.isPointer() && + AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) == + AddrSpace2PointerActions[OpcodeIdx].end()) { + return {NotFound, LLT()}; + } + const SmallVector<SizeAndActionsVec, 1> &Actions = + Aspect.Type.isPointer() + ? AddrSpace2PointerActions[OpcodeIdx] + .find(Aspect.Type.getAddressSpace()) + ->second + : ScalarActions[OpcodeIdx]; + if (Aspect.Idx >= Actions.size()) + return {NotFound, LLT()}; + const SizeAndActionsVec &Vec = Actions[Aspect.Idx]; + // FIXME: speed up this search, e.g. by using a results cache for repeated + // queries? + auto SizeAndAction = findAction(Vec, Aspect.Type.getSizeInBits()); + return {SizeAndAction.second, + Aspect.Type.isScalar() ? LLT::scalar(SizeAndAction.first) + : LLT::pointer(Aspect.Type.getAddressSpace(), + SizeAndAction.first)}; +} + +std::pair<LegalizeAction, LLT> +LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const { + assert(Aspect.Type.isVector()); + // First legalize the vector element size, then legalize the number of + // lanes in the vector. + if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp) + return {NotFound, Aspect.Type}; + const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode); + const unsigned TypeIdx = Aspect.Idx; + if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size()) + return {NotFound, Aspect.Type}; + const SizeAndActionsVec &ElemSizeVec = + ScalarInVectorActions[OpcodeIdx][TypeIdx]; + + LLT IntermediateType; + auto ElementSizeAndAction = + findAction(ElemSizeVec, Aspect.Type.getScalarSizeInBits()); + IntermediateType = + LLT::vector(Aspect.Type.getNumElements(), ElementSizeAndAction.first); + if (ElementSizeAndAction.second != Legal) + return {ElementSizeAndAction.second, IntermediateType}; + + auto i = NumElements2Actions[OpcodeIdx].find( + IntermediateType.getScalarSizeInBits()); + if (i == NumElements2Actions[OpcodeIdx].end()) { + return {NotFound, IntermediateType}; + } + const SizeAndActionsVec &NumElementsVec = (*i).second[TypeIdx]; + auto NumElementsAndAction = + findAction(NumElementsVec, IntermediateType.getNumElements()); + return {NumElementsAndAction.second, + LLT::vector(NumElementsAndAction.first, + IntermediateType.getScalarSizeInBits())}; +} + +/// \pre Type indices of every opcode form a dense set starting from 0. +void LegalizerInfo::verify(const MCInstrInfo &MII) const { +#ifndef NDEBUG + std::vector<unsigned> FailedOpcodes; + for (unsigned Opcode = FirstOp; Opcode <= LastOp; ++Opcode) { + const MCInstrDesc &MCID = MII.get(Opcode); + const unsigned NumTypeIdxs = std::accumulate( + MCID.opInfo_begin(), MCID.opInfo_end(), 0U, + [](unsigned Acc, const MCOperandInfo &OpInfo) { + return OpInfo.isGenericType() + ? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc) + : Acc; + }); + LLVM_DEBUG(dbgs() << MII.getName(Opcode) << " (opcode " << Opcode + << "): " << NumTypeIdxs << " type ind" + << (NumTypeIdxs == 1 ? "ex" : "ices") << "\n"); + const LegalizeRuleSet &RuleSet = getActionDefinitions(Opcode); + if (!RuleSet.verifyTypeIdxsCoverage(NumTypeIdxs)) + FailedOpcodes.push_back(Opcode); + } + if (!FailedOpcodes.empty()) { + errs() << "The following opcodes have ill-defined legalization rules:"; + for (unsigned Opcode : FailedOpcodes) + errs() << " " << MII.getName(Opcode); + errs() << "\n"; + + report_fatal_error("ill-defined LegalizerInfo" + ", try -debug-only=legalizer-info for details"); + } +#endif +} + +#ifndef NDEBUG +// FIXME: This should be in the MachineVerifier, but it can't use the +// LegalizerInfo as it's currently in the separate GlobalISel library. +// Note that RegBankSelected property already checked in the verifier +// has the same layering problem, but we only use inline methods so +// end up not needing to link against the GlobalISel library. +const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) { + if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB) + if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) + return &MI; + } + return nullptr; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp new file mode 100644 index 000000000000..52b340753a50 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -0,0 +1,129 @@ +//===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the Localizer class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Localizer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "localizer" + +using namespace llvm; + +char Localizer::ID = 0; +INITIALIZE_PASS(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", false, + false) + +Localizer::Localizer() : MachineFunctionPass(ID) { + initializeLocalizerPass(*PassRegistry::getPassRegistry()); +} + +void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); } + +bool Localizer::shouldLocalize(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + // Constants-like instructions should be close to their users. + // We don't want long live-ranges for them. + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FRAME_INDEX: + return true; + } +} + +void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, + MachineBasicBlock *&InsertMBB) { + MachineInstr &MIUse = *MOUse.getParent(); + InsertMBB = MIUse.getParent(); + if (MIUse.isPHI()) + InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB(); + return InsertMBB == Def.getParent(); +} + +bool Localizer::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n'); + + init(MF); + + bool Changed = false; + // Keep track of the instructions we localized. + // We won't need to process them if we see them later in the CFG. + SmallPtrSet<MachineInstr *, 16> LocalizedInstrs; + DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef; + // TODO: Do bottom up traversal. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) + continue; + LLVM_DEBUG(dbgs() << "Should localize: " << MI); + assert(MI.getDesc().getNumDefs() == 1 && + "More than one definition not supported yet"); + unsigned Reg = MI.getOperand(0).getReg(); + // Check if all the users of MI are local. + // We are going to invalidation the list of use operands, so we + // can't use range iterator. + for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); + MOIt != MOItEnd;) { + MachineOperand &MOUse = *MOIt++; + // Check if the use is already local. + MachineBasicBlock *InsertMBB; + LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); + dbgs() << "Checking use: " << MIUse + << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); + if (isLocalUse(MOUse, MI, InsertMBB)) + continue; + LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); + Changed = true; + auto MBBAndReg = std::make_pair(InsertMBB, Reg); + auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); + if (NewVRegIt == MBBWithLocalDef.end()) { + // Create the localized instruction. + MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); + LocalizedInstrs.insert(LocalizedMI); + // Don't try to be smart for the insertion point. + // There is no guarantee that the first seen use is the first + // use in the block. + InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), + LocalizedMI); + + // Set a new register for the definition. + unsigned NewReg = + MRI->createGenericVirtualRegister(MRI->getType(Reg)); + MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); + LocalizedMI->getOperand(0).setReg(NewReg); + NewVRegIt = + MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; + LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI); + } + LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) + << '\n'); + // Update the user reg. + MOUse.setReg(NewVRegIt->second); + } + } + } + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp new file mode 100644 index 000000000000..9df931eb81b3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -0,0 +1,832 @@ +//===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.cpp - MIBuilder--*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the MachineIRBuidler class. +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugInfo.h" + +using namespace llvm; + +void MachineIRBuilderBase::setMF(MachineFunction &MF) { + State.MF = &MF; + State.MBB = nullptr; + State.MRI = &MF.getRegInfo(); + State.TII = MF.getSubtarget().getInstrInfo(); + State.DL = DebugLoc(); + State.II = MachineBasicBlock::iterator(); + State.InsertedInstr = nullptr; +} + +void MachineIRBuilderBase::setMBB(MachineBasicBlock &MBB) { + State.MBB = &MBB; + State.II = MBB.end(); + assert(&getMF() == MBB.getParent() && + "Basic block is in a different function"); +} + +void MachineIRBuilderBase::setInstr(MachineInstr &MI) { + assert(MI.getParent() && "Instruction is not part of a basic block"); + setMBB(*MI.getParent()); + State.II = MI.getIterator(); +} + +void MachineIRBuilderBase::setInsertPt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator II) { + assert(MBB.getParent() == &getMF() && + "Basic block is in a different function"); + State.MBB = &MBB; + State.II = II; +} + +void MachineIRBuilderBase::recordInsertion(MachineInstr *InsertedInstr) const { + if (State.InsertedInstr) + State.InsertedInstr(InsertedInstr); +} + +void MachineIRBuilderBase::recordInsertions( + std::function<void(MachineInstr *)> Inserted) { + State.InsertedInstr = std::move(Inserted); +} + +void MachineIRBuilderBase::stopRecordingInsertions() { + State.InsertedInstr = nullptr; +} + +//------------------------------------------------------------------------------ +// Build instruction variants. +//------------------------------------------------------------------------------ + +MachineInstrBuilder MachineIRBuilderBase::buildInstr(unsigned Opcode) { + return insertInstr(buildInstrNoInsert(Opcode)); +} + +MachineInstrBuilder MachineIRBuilderBase::buildInstrNoInsert(unsigned Opcode) { + MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode)); + return MIB; +} + +MachineInstrBuilder MachineIRBuilderBase::insertInstr(MachineInstrBuilder MIB) { + getMBB().insert(getInsertPt(), MIB); + recordInsertion(MIB); + return MIB; +} + +MachineInstrBuilder +MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable, + const MDNode *Expr) { + assert(isa<DILocalVariable>(Variable) && "not a variable"); + assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); + assert( + cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && + "Expected inlined-at fields to agree"); + return insertInstr(BuildMI(getMF(), getDL(), + getTII().get(TargetOpcode::DBG_VALUE), + /*IsIndirect*/ false, Reg, Variable, Expr)); +} + +MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue( + unsigned Reg, const MDNode *Variable, const MDNode *Expr) { + assert(isa<DILocalVariable>(Variable) && "not a variable"); + assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); + assert( + cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && + "Expected inlined-at fields to agree"); + return insertInstr(BuildMI(getMF(), getDL(), + getTII().get(TargetOpcode::DBG_VALUE), + /*IsIndirect*/ true, Reg, Variable, Expr)); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable, + const MDNode *Expr) { + assert(isa<DILocalVariable>(Variable) && "not a variable"); + assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); + assert( + cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && + "Expected inlined-at fields to agree"); + return buildInstr(TargetOpcode::DBG_VALUE) + .addFrameIndex(FI) + .addImm(0) + .addMetadata(Variable) + .addMetadata(Expr); +} + +MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue( + const Constant &C, const MDNode *Variable, const MDNode *Expr) { + assert(isa<DILocalVariable>(Variable) && "not a variable"); + assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); + assert( + cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && + "Expected inlined-at fields to agree"); + auto MIB = buildInstr(TargetOpcode::DBG_VALUE); + if (auto *CI = dyn_cast<ConstantInt>(&C)) { + if (CI->getBitWidth() > 64) + MIB.addCImm(CI); + else + MIB.addImm(CI->getZExtValue()); + } else if (auto *CFP = dyn_cast<ConstantFP>(&C)) { + MIB.addFPImm(CFP); + } else { + // Insert %noreg if we didn't find a usable constant and had to drop it. + MIB.addReg(0U); + } + + return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); +} + +MachineInstrBuilder MachineIRBuilderBase::buildFrameIndex(unsigned Res, + int Idx) { + assert(getMRI()->getType(Res).isPointer() && "invalid operand type"); + return buildInstr(TargetOpcode::G_FRAME_INDEX) + .addDef(Res) + .addFrameIndex(Idx); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) { + assert(getMRI()->getType(Res).isPointer() && "invalid operand type"); + assert(getMRI()->getType(Res).getAddressSpace() == + GV->getType()->getAddressSpace() && + "address space mismatch"); + + return buildInstr(TargetOpcode::G_GLOBAL_VALUE) + .addDef(Res) + .addGlobalAddress(GV); +} + +void MachineIRBuilderBase::validateBinaryOp(unsigned Res, unsigned Op0, + unsigned Op1) { + assert((getMRI()->getType(Res).isScalar() || + getMRI()->getType(Res).isVector()) && + "invalid operand type"); + assert(getMRI()->getType(Res) == getMRI()->getType(Op0) && + getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch"); +} + +MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0, + unsigned Op1) { + assert(getMRI()->getType(Res).isPointer() && + getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch"); + assert(getMRI()->getType(Op1).isScalar() && "invalid offset type"); + + return buildInstr(TargetOpcode::G_GEP) + .addDef(Res) + .addUse(Op0) + .addUse(Op1); +} + +Optional<MachineInstrBuilder> +MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0, + const LLT &ValueTy, uint64_t Value) { + assert(Res == 0 && "Res is a result argument"); + assert(ValueTy.isScalar() && "invalid offset type"); + + if (Value == 0) { + Res = Op0; + return None; + } + + Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0)); + unsigned TmpReg = getMRI()->createGenericVirtualRegister(ValueTy); + + buildConstant(TmpReg, Value); + return buildGEP(Res, Op0, TmpReg); +} + +MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res, + unsigned Op0, + uint32_t NumBits) { + assert(getMRI()->getType(Res).isPointer() && + getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch"); + + return buildInstr(TargetOpcode::G_PTR_MASK) + .addDef(Res) + .addUse(Op0) + .addImm(NumBits); +} + +MachineInstrBuilder MachineIRBuilderBase::buildBr(MachineBasicBlock &Dest) { + return buildInstr(TargetOpcode::G_BR).addMBB(&Dest); +} + +MachineInstrBuilder MachineIRBuilderBase::buildBrIndirect(unsigned Tgt) { + assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination"); + return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt); +} + +MachineInstrBuilder MachineIRBuilderBase::buildCopy(unsigned Res, unsigned Op) { + assert(getMRI()->getType(Res) == LLT() || getMRI()->getType(Op) == LLT() || + getMRI()->getType(Res) == getMRI()->getType(Op)); + return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) { + LLT Ty = getMRI()->getType(Res); + + assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type"); + + const ConstantInt *NewVal = &Val; + if (Ty.getSizeInBits() != Val.getBitWidth()) + NewVal = ConstantInt::get(getMF().getFunction().getContext(), + Val.getValue().sextOrTrunc(Ty.getSizeInBits())); + + return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal); +} + +MachineInstrBuilder MachineIRBuilderBase::buildConstant(unsigned Res, + int64_t Val) { + auto IntN = IntegerType::get(getMF().getFunction().getContext(), + getMRI()->getType(Res).getSizeInBits()); + ConstantInt *CI = ConstantInt::get(IntN, Val, true); + return buildConstant(Res, *CI); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildFConstant(unsigned Res, const ConstantFP &Val) { + assert(getMRI()->getType(Res).isScalar() && "invalid operand type"); + + return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val); +} + +MachineInstrBuilder MachineIRBuilderBase::buildFConstant(unsigned Res, + double Val) { + LLT DstTy = getMRI()->getType(Res); + auto &Ctx = getMF().getFunction().getContext(); + auto *CFP = + ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits())); + return buildFConstant(Res, *CFP); +} + +MachineInstrBuilder MachineIRBuilderBase::buildBrCond(unsigned Tst, + MachineBasicBlock &Dest) { + assert(getMRI()->getType(Tst).isScalar() && "invalid operand type"); + + return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest); +} + +MachineInstrBuilder MachineIRBuilderBase::buildLoad(unsigned Res, unsigned Addr, + MachineMemOperand &MMO) { + return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res, + unsigned Addr, MachineMemOperand &MMO) { + assert(getMRI()->getType(Res).isValid() && "invalid operand type"); + assert(getMRI()->getType(Addr).isPointer() && "invalid operand type"); + + return buildInstr(Opcode) + .addDef(Res) + .addUse(Addr) + .addMemOperand(&MMO); +} + +MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val, + unsigned Addr, + MachineMemOperand &MMO) { + assert(getMRI()->getType(Val).isValid() && "invalid operand type"); + assert(getMRI()->getType(Addr).isPointer() && "invalid operand type"); + + return buildInstr(TargetOpcode::G_STORE) + .addUse(Val) + .addUse(Addr) + .addMemOperand(&MMO); +} + +MachineInstrBuilder MachineIRBuilderBase::buildUAdde(unsigned Res, + unsigned CarryOut, + unsigned Op0, unsigned Op1, + unsigned CarryIn) { + assert(getMRI()->getType(Res).isScalar() && "invalid operand type"); + assert(getMRI()->getType(Res) == getMRI()->getType(Op0) && + getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch"); + assert(getMRI()->getType(CarryOut).isScalar() && "invalid operand type"); + assert(getMRI()->getType(CarryOut) == getMRI()->getType(CarryIn) && + "type mismatch"); + + return buildInstr(TargetOpcode::G_UADDE) + .addDef(Res) + .addDef(CarryOut) + .addUse(Op0) + .addUse(Op1) + .addUse(CarryIn); +} + +MachineInstrBuilder MachineIRBuilderBase::buildAnyExt(unsigned Res, + unsigned Op) { + validateTruncExt(Res, Op, true); + return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilderBase::buildSExt(unsigned Res, unsigned Op) { + validateTruncExt(Res, Op, true); + return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilderBase::buildZExt(unsigned Res, unsigned Op) { + validateTruncExt(Res, Op, true); + return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilderBase::buildExtOrTrunc(unsigned ExtOpc, + unsigned Res, + unsigned Op) { + assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc || + TargetOpcode::G_SEXT == ExtOpc) && + "Expecting Extending Opc"); + assert(getMRI()->getType(Res).isScalar() || + getMRI()->getType(Res).isVector()); + assert(getMRI()->getType(Res).isScalar() == getMRI()->getType(Op).isScalar()); + + unsigned Opcode = TargetOpcode::COPY; + if (getMRI()->getType(Res).getSizeInBits() > + getMRI()->getType(Op).getSizeInBits()) + Opcode = ExtOpc; + else if (getMRI()->getType(Res).getSizeInBits() < + getMRI()->getType(Op).getSizeInBits()) + Opcode = TargetOpcode::G_TRUNC; + else + assert(getMRI()->getType(Res) == getMRI()->getType(Op)); + + return buildInstr(Opcode).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilderBase::buildSExtOrTrunc(unsigned Res, + unsigned Op) { + return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op); +} + +MachineInstrBuilder MachineIRBuilderBase::buildZExtOrTrunc(unsigned Res, + unsigned Op) { + return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op); +} + +MachineInstrBuilder MachineIRBuilderBase::buildAnyExtOrTrunc(unsigned Res, + unsigned Op) { + return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op); +} + +MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst, + unsigned Src) { + LLT SrcTy = getMRI()->getType(Src); + LLT DstTy = getMRI()->getType(Dst); + if (SrcTy == DstTy) + return buildCopy(Dst, Src); + + unsigned Opcode; + if (SrcTy.isPointer() && DstTy.isScalar()) + Opcode = TargetOpcode::G_PTRTOINT; + else if (DstTy.isPointer() && SrcTy.isScalar()) + Opcode = TargetOpcode::G_INTTOPTR; + else { + assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet"); + Opcode = TargetOpcode::G_BITCAST; + } + + return buildInstr(Opcode).addDef(Dst).addUse(Src); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) { +#ifndef NDEBUG + assert(getMRI()->getType(Src).isValid() && "invalid operand type"); + assert(getMRI()->getType(Res).isValid() && "invalid operand type"); + assert(Index + getMRI()->getType(Res).getSizeInBits() <= + getMRI()->getType(Src).getSizeInBits() && + "extracting off end of register"); +#endif + + if (getMRI()->getType(Res).getSizeInBits() == + getMRI()->getType(Src).getSizeInBits()) { + assert(Index == 0 && "insertion past the end of a register"); + return buildCast(Res, Src); + } + + return buildInstr(TargetOpcode::G_EXTRACT) + .addDef(Res) + .addUse(Src) + .addImm(Index); +} + +void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, + ArrayRef<uint64_t> Indices) { +#ifndef NDEBUG + assert(Ops.size() == Indices.size() && "incompatible args"); + assert(!Ops.empty() && "invalid trivial sequence"); + assert(std::is_sorted(Indices.begin(), Indices.end()) && + "sequence offsets must be in ascending order"); + + assert(getMRI()->getType(Res).isValid() && "invalid operand type"); + for (auto Op : Ops) + assert(getMRI()->getType(Op).isValid() && "invalid operand type"); +#endif + + LLT ResTy = getMRI()->getType(Res); + LLT OpTy = getMRI()->getType(Ops[0]); + unsigned OpSize = OpTy.getSizeInBits(); + bool MaybeMerge = true; + for (unsigned i = 0; i < Ops.size(); ++i) { + if (getMRI()->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) { + MaybeMerge = false; + break; + } + } + + if (MaybeMerge && Ops.size() * OpSize == ResTy.getSizeInBits()) { + buildMerge(Res, Ops); + return; + } + + unsigned ResIn = getMRI()->createGenericVirtualRegister(ResTy); + buildUndef(ResIn); + + for (unsigned i = 0; i < Ops.size(); ++i) { + unsigned ResOut = i + 1 == Ops.size() + ? Res + : getMRI()->createGenericVirtualRegister(ResTy); + buildInsert(ResOut, ResIn, Ops[i], Indices[i]); + ResIn = ResOut; + } +} + +MachineInstrBuilder MachineIRBuilderBase::buildUndef(unsigned Res) { + return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res); +} + +MachineInstrBuilder MachineIRBuilderBase::buildMerge(unsigned Res, + ArrayRef<unsigned> Ops) { + +#ifndef NDEBUG + assert(!Ops.empty() && "invalid trivial sequence"); + LLT Ty = getMRI()->getType(Ops[0]); + for (auto Reg : Ops) + assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list"); + assert(Ops.size() * getMRI()->getType(Ops[0]).getSizeInBits() == + getMRI()->getType(Res).getSizeInBits() && + "input operands do not cover output register"); +#endif + + if (Ops.size() == 1) + return buildCast(Res, Ops[0]); + + MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES); + MIB.addDef(Res); + for (unsigned i = 0; i < Ops.size(); ++i) + MIB.addUse(Ops[i]); + return MIB; +} + +MachineInstrBuilder MachineIRBuilderBase::buildUnmerge(ArrayRef<unsigned> Res, + unsigned Op) { + +#ifndef NDEBUG + assert(!Res.empty() && "invalid trivial sequence"); + LLT Ty = getMRI()->getType(Res[0]); + for (auto Reg : Res) + assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list"); + assert(Res.size() * getMRI()->getType(Res[0]).getSizeInBits() == + getMRI()->getType(Op).getSizeInBits() && + "input operands do not cover output register"); +#endif + + MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_UNMERGE_VALUES); + for (unsigned i = 0; i < Res.size(); ++i) + MIB.addDef(Res[i]); + MIB.addUse(Op); + return MIB; +} + +MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res, + unsigned Src, unsigned Op, + unsigned Index) { + assert(Index + getMRI()->getType(Op).getSizeInBits() <= + getMRI()->getType(Res).getSizeInBits() && + "insertion past the end of a register"); + + if (getMRI()->getType(Res).getSizeInBits() == + getMRI()->getType(Op).getSizeInBits()) { + return buildCast(Res, Op); + } + + return buildInstr(TargetOpcode::G_INSERT) + .addDef(Res) + .addUse(Src) + .addUse(Op) + .addImm(Index); +} + +MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID, + unsigned Res, + bool HasSideEffects) { + auto MIB = + buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS + : TargetOpcode::G_INTRINSIC); + if (Res) + MIB.addDef(Res); + MIB.addIntrinsicID(ID); + return MIB; +} + +MachineInstrBuilder MachineIRBuilderBase::buildTrunc(unsigned Res, + unsigned Op) { + validateTruncExt(Res, Op, false); + return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilderBase::buildFPTrunc(unsigned Res, + unsigned Op) { + validateTruncExt(Res, Op, false); + return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op); +} + +MachineInstrBuilder MachineIRBuilderBase::buildICmp(CmpInst::Predicate Pred, + unsigned Res, unsigned Op0, + unsigned Op1) { +#ifndef NDEBUG + assert(getMRI()->getType(Op0) == getMRI()->getType(Op0) && "type mismatch"); + assert(CmpInst::isIntPredicate(Pred) && "invalid predicate"); + if (getMRI()->getType(Op0).isScalar() || getMRI()->getType(Op0).isPointer()) + assert(getMRI()->getType(Res).isScalar() && "type mismatch"); + else + assert(getMRI()->getType(Res).isVector() && + getMRI()->getType(Res).getNumElements() == + getMRI()->getType(Op0).getNumElements() && + "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_ICMP) + .addDef(Res) + .addPredicate(Pred) + .addUse(Op0) + .addUse(Op1); +} + +MachineInstrBuilder MachineIRBuilderBase::buildFCmp(CmpInst::Predicate Pred, + unsigned Res, unsigned Op0, + unsigned Op1) { +#ifndef NDEBUG + assert((getMRI()->getType(Op0).isScalar() || + getMRI()->getType(Op0).isVector()) && + "invalid operand type"); + assert(getMRI()->getType(Op0) == getMRI()->getType(Op1) && "type mismatch"); + assert(CmpInst::isFPPredicate(Pred) && "invalid predicate"); + if (getMRI()->getType(Op0).isScalar()) + assert(getMRI()->getType(Res).isScalar() && "type mismatch"); + else + assert(getMRI()->getType(Res).isVector() && + getMRI()->getType(Res).getNumElements() == + getMRI()->getType(Op0).getNumElements() && + "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_FCMP) + .addDef(Res) + .addPredicate(Pred) + .addUse(Op0) + .addUse(Op1); +} + +MachineInstrBuilder MachineIRBuilderBase::buildSelect(unsigned Res, + unsigned Tst, + unsigned Op0, + unsigned Op1) { +#ifndef NDEBUG + LLT ResTy = getMRI()->getType(Res); + assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) && + "invalid operand type"); + assert(ResTy == getMRI()->getType(Op0) && ResTy == getMRI()->getType(Op1) && + "type mismatch"); + if (ResTy.isScalar() || ResTy.isPointer()) + assert(getMRI()->getType(Tst).isScalar() && "type mismatch"); + else + assert((getMRI()->getType(Tst).isScalar() || + (getMRI()->getType(Tst).isVector() && + getMRI()->getType(Tst).getNumElements() == + getMRI()->getType(Op0).getNumElements())) && + "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_SELECT) + .addDef(Res) + .addUse(Tst) + .addUse(Op0) + .addUse(Op1); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildInsertVectorElement(unsigned Res, unsigned Val, + unsigned Elt, unsigned Idx) { +#ifndef NDEBUG + LLT ResTy = getMRI()->getType(Res); + LLT ValTy = getMRI()->getType(Val); + LLT EltTy = getMRI()->getType(Elt); + LLT IdxTy = getMRI()->getType(Idx); + assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type"); + assert(IdxTy.isScalar() && "invalid operand type"); + assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch"); + assert(ResTy.getElementType() == EltTy && "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT) + .addDef(Res) + .addUse(Val) + .addUse(Elt) + .addUse(Idx); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildExtractVectorElement(unsigned Res, unsigned Val, + unsigned Idx) { +#ifndef NDEBUG + LLT ResTy = getMRI()->getType(Res); + LLT ValTy = getMRI()->getType(Val); + LLT IdxTy = getMRI()->getType(Idx); + assert(ValTy.isVector() && "invalid operand type"); + assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type"); + assert(IdxTy.isScalar() && "invalid operand type"); + assert(ValTy.getElementType() == ResTy && "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT) + .addDef(Res) + .addUse(Val) + .addUse(Idx); +} + +MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess( + unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal, + unsigned NewVal, MachineMemOperand &MMO) { +#ifndef NDEBUG + LLT OldValResTy = getMRI()->getType(OldValRes); + LLT SuccessResTy = getMRI()->getType(SuccessRes); + LLT AddrTy = getMRI()->getType(Addr); + LLT CmpValTy = getMRI()->getType(CmpVal); + LLT NewValTy = getMRI()->getType(NewVal); + assert(OldValResTy.isScalar() && "invalid operand type"); + assert(SuccessResTy.isScalar() && "invalid operand type"); + assert(AddrTy.isPointer() && "invalid operand type"); + assert(CmpValTy.isValid() && "invalid operand type"); + assert(NewValTy.isValid() && "invalid operand type"); + assert(OldValResTy == CmpValTy && "type mismatch"); + assert(OldValResTy == NewValTy && "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) + .addDef(OldValRes) + .addDef(SuccessRes) + .addUse(Addr) + .addUse(CmpVal) + .addUse(NewVal) + .addMemOperand(&MMO); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, + unsigned CmpVal, unsigned NewVal, + MachineMemOperand &MMO) { +#ifndef NDEBUG + LLT OldValResTy = getMRI()->getType(OldValRes); + LLT AddrTy = getMRI()->getType(Addr); + LLT CmpValTy = getMRI()->getType(CmpVal); + LLT NewValTy = getMRI()->getType(NewVal); + assert(OldValResTy.isScalar() && "invalid operand type"); + assert(AddrTy.isPointer() && "invalid operand type"); + assert(CmpValTy.isValid() && "invalid operand type"); + assert(NewValTy.isValid() && "invalid operand type"); + assert(OldValResTy == CmpValTy && "type mismatch"); + assert(OldValResTy == NewValTy && "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG) + .addDef(OldValRes) + .addUse(Addr) + .addUse(CmpVal) + .addUse(NewVal) + .addMemOperand(&MMO); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes, + unsigned Addr, unsigned Val, + MachineMemOperand &MMO) { +#ifndef NDEBUG + LLT OldValResTy = getMRI()->getType(OldValRes); + LLT AddrTy = getMRI()->getType(Addr); + LLT ValTy = getMRI()->getType(Val); + assert(OldValResTy.isScalar() && "invalid operand type"); + assert(AddrTy.isPointer() && "invalid operand type"); + assert(ValTy.isValid() && "invalid operand type"); + assert(OldValResTy == ValTy && "type mismatch"); +#endif + + return buildInstr(Opcode) + .addDef(OldValRes) + .addUse(Addr) + .addUse(Val) + .addMemOperand(&MMO); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWOr(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val, + MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr, + unsigned Val, MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val, + MMO); +} + +void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src, + bool IsExtend) { +#ifndef NDEBUG + LLT SrcTy = getMRI()->getType(Src); + LLT DstTy = getMRI()->getType(Dst); + + if (DstTy.isVector()) { + assert(SrcTy.isVector() && "mismatched cast between vector and non-vector"); + assert(SrcTy.getNumElements() == DstTy.getNumElements() && + "different number of elements in a trunc/ext"); + } else + assert(DstTy.isScalar() && SrcTy.isScalar() && "invalid extend/trunc"); + + if (IsExtend) + assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() && + "invalid narrowing extend"); + else + assert(DstTy.getSizeInBits() < SrcTy.getSizeInBits() && + "invalid widening trunc"); +#endif +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp new file mode 100644 index 000000000000..9e2d48d1dc42 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -0,0 +1,1001 @@ +//==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the RegBankSelect class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <limits> +#include <memory> +#include <utility> + +#define DEBUG_TYPE "regbankselect" + +using namespace llvm; + +static cl::opt<RegBankSelect::Mode> RegBankSelectMode( + cl::desc("Mode of the RegBankSelect pass"), cl::Hidden, cl::Optional, + cl::values(clEnumValN(RegBankSelect::Mode::Fast, "regbankselect-fast", + "Run the Fast mode (default mapping)"), + clEnumValN(RegBankSelect::Mode::Greedy, "regbankselect-greedy", + "Use the Greedy mode (best local mapping)"))); + +char RegBankSelect::ID = 0; + +INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE, + "Assign register bank of generic virtual registers", + false, false); +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, + "Assign register bank of generic virtual registers", false, + false) + +RegBankSelect::RegBankSelect(Mode RunningMode) + : MachineFunctionPass(ID), OptMode(RunningMode) { + initializeRegBankSelectPass(*PassRegistry::getPassRegistry()); + if (RegBankSelectMode.getNumOccurrences() != 0) { + OptMode = RegBankSelectMode; + if (RegBankSelectMode != RunningMode) + LLVM_DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n"); + } +} + +void RegBankSelect::init(MachineFunction &MF) { + RBI = MF.getSubtarget().getRegBankInfo(); + assert(RBI && "Cannot work without RegisterBankInfo"); + MRI = &MF.getRegInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + TPC = &getAnalysis<TargetPassConfig>(); + if (OptMode != Mode::Fast) { + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + } else { + MBFI = nullptr; + MBPI = nullptr; + } + MIRBuilder.setMF(MF); + MORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI); +} + +void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { + if (OptMode != Mode::Fast) { + // We could preserve the information from these two analysis but + // the APIs do not allow to do so yet. + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBranchProbabilityInfo>(); + } + AU.addRequired<TargetPassConfig>(); + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool RegBankSelect::assignmentMatch( + unsigned Reg, const RegisterBankInfo::ValueMapping &ValMapping, + bool &OnlyAssign) const { + // By default we assume we will have to repair something. + OnlyAssign = false; + // Each part of a break down needs to end up in a different register. + // In other word, Reg assignement does not match. + if (ValMapping.NumBreakDowns > 1) + return false; + + const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI); + const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank; + // Reg is free of assignment, a simple assignment will make the + // register bank to match. + OnlyAssign = CurRegBank == nullptr; + LLVM_DEBUG(dbgs() << "Does assignment already match: "; + if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none"; + dbgs() << " against "; + assert(DesiredRegBrank && "The mapping must be valid"); + dbgs() << *DesiredRegBrank << '\n';); + return CurRegBank == DesiredRegBrank; +} + +bool RegBankSelect::repairReg( + MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping, + RegBankSelect::RepairingPlacement &RepairPt, + const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) { + if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled()) + return false; + assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented"); + // An empty range of new register means no repairing. + assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair"); + + // Assume we are repairing a use and thus, the original reg will be + // the source of the repairing. + unsigned Src = MO.getReg(); + unsigned Dst = *NewVRegs.begin(); + + // If we repair a definition, swap the source and destination for + // the repairing. + if (MO.isDef()) + std::swap(Src, Dst); + + assert((RepairPt.getNumInsertPoints() == 1 || + TargetRegisterInfo::isPhysicalRegister(Dst)) && + "We are about to create several defs for Dst"); + + // Build the instruction used to repair, then clone it at the right + // places. Avoiding buildCopy bypasses the check that Src and Dst have the + // same types because the type is a placeholder when this function is called. + MachineInstr *MI = + MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src); + LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) + << '\n'); + // TODO: + // Check if MI is legal. if not, we need to legalize all the + // instructions we are going to insert. + std::unique_ptr<MachineInstr *[]> NewInstrs( + new MachineInstr *[RepairPt.getNumInsertPoints()]); + bool IsFirst = true; + unsigned Idx = 0; + for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) { + MachineInstr *CurMI; + if (IsFirst) + CurMI = MI; + else + CurMI = MIRBuilder.getMF().CloneMachineInstr(MI); + InsertPt->insert(*CurMI); + NewInstrs[Idx++] = CurMI; + IsFirst = false; + } + // TODO: + // Legalize NewInstrs if need be. + return true; +} + +uint64_t RegBankSelect::getRepairCost( + const MachineOperand &MO, + const RegisterBankInfo::ValueMapping &ValMapping) const { + assert(MO.isReg() && "We should only repair register operand"); + assert(ValMapping.NumBreakDowns && "Nothing to map??"); + + bool IsSameNumOfValues = ValMapping.NumBreakDowns == 1; + const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI); + // If MO does not have a register bank, we should have just been + // able to set one unless we have to break the value down. + assert((!IsSameNumOfValues || CurRegBank) && "We should not have to repair"); + // Def: Val <- NewDefs + // Same number of values: copy + // Different number: Val = build_sequence Defs1, Defs2, ... + // Use: NewSources <- Val. + // Same number of values: copy. + // Different number: Src1, Src2, ... = + // extract_value Val, Src1Begin, Src1Len, Src2Begin, Src2Len, ... + // We should remember that this value is available somewhere else to + // coalesce the value. + + if (IsSameNumOfValues) { + const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank; + // If we repair a definition, swap the source and destination for + // the repairing. + if (MO.isDef()) + std::swap(CurRegBank, DesiredRegBrank); + // TODO: It may be possible to actually avoid the copy. + // If we repair something where the source is defined by a copy + // and the source of that copy is on the right bank, we can reuse + // it for free. + // E.g., + // RegToRepair<BankA> = copy AlternativeSrc<BankB> + // = op RegToRepair<BankA> + // We can simply propagate AlternativeSrc instead of copying RegToRepair + // into a new virtual register. + // We would also need to propagate this information in the + // repairing placement. + unsigned Cost = RBI->copyCost(*DesiredRegBrank, *CurRegBank, + RBI->getSizeInBits(MO.getReg(), *MRI, *TRI)); + // TODO: use a dedicated constant for ImpossibleCost. + if (Cost != std::numeric_limits<unsigned>::max()) + return Cost; + // Return the legalization cost of that repairing. + } + return std::numeric_limits<unsigned>::max(); +} + +const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( + MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings, + SmallVectorImpl<RepairingPlacement> &RepairPts) { + assert(!PossibleMappings.empty() && + "Do not know how to map this instruction"); + + const RegisterBankInfo::InstructionMapping *BestMapping = nullptr; + MappingCost Cost = MappingCost::ImpossibleCost(); + SmallVector<RepairingPlacement, 4> LocalRepairPts; + for (const RegisterBankInfo::InstructionMapping *CurMapping : + PossibleMappings) { + MappingCost CurCost = + computeMapping(MI, *CurMapping, LocalRepairPts, &Cost); + if (CurCost < Cost) { + LLVM_DEBUG(dbgs() << "New best: " << CurCost << '\n'); + Cost = CurCost; + BestMapping = CurMapping; + RepairPts.clear(); + for (RepairingPlacement &RepairPt : LocalRepairPts) + RepairPts.emplace_back(std::move(RepairPt)); + } + } + if (!BestMapping && !TPC->isGlobalISelAbortEnabled()) { + // If none of the mapping worked that means they are all impossible. + // Thus, pick the first one and set an impossible repairing point. + // It will trigger the failed isel mode. + BestMapping = *PossibleMappings.begin(); + RepairPts.emplace_back( + RepairingPlacement(MI, 0, *TRI, *this, RepairingPlacement::Impossible)); + } else + assert(BestMapping && "No suitable mapping for instruction"); + return *BestMapping; +} + +void RegBankSelect::tryAvoidingSplit( + RegBankSelect::RepairingPlacement &RepairPt, const MachineOperand &MO, + const RegisterBankInfo::ValueMapping &ValMapping) const { + const MachineInstr &MI = *MO.getParent(); + assert(RepairPt.hasSplit() && "We should not have to adjust for split"); + // Splitting should only occur for PHIs or between terminators, + // because we only do local repairing. + assert((MI.isPHI() || MI.isTerminator()) && "Why do we split?"); + + assert(&MI.getOperand(RepairPt.getOpIdx()) == &MO && + "Repairing placement does not match operand"); + + // If we need splitting for phis, that means it is because we + // could not find an insertion point before the terminators of + // the predecessor block for this argument. In other words, + // the input value is defined by one of the terminators. + assert((!MI.isPHI() || !MO.isDef()) && "Need split for phi def?"); + + // We split to repair the use of a phi or a terminator. + if (!MO.isDef()) { + if (MI.isTerminator()) { + assert(&MI != &(*MI.getParent()->getFirstTerminator()) && + "Need to split for the first terminator?!"); + } else { + // For the PHI case, the split may not be actually required. + // In the copy case, a phi is already a copy on the incoming edge, + // therefore there is no need to split. + if (ValMapping.NumBreakDowns == 1) + // This is a already a copy, there is nothing to do. + RepairPt.switchTo(RepairingPlacement::RepairingKind::Reassign); + } + return; + } + + // At this point, we need to repair a defintion of a terminator. + + // Technically we need to fix the def of MI on all outgoing + // edges of MI to keep the repairing local. In other words, we + // will create several definitions of the same register. This + // does not work for SSA unless that definition is a physical + // register. + // However, there are other cases where we can get away with + // that while still keeping the repairing local. + assert(MI.isTerminator() && MO.isDef() && + "This code is for the def of a terminator"); + + // Since we use RPO traversal, if we need to repair a definition + // this means this definition could be: + // 1. Used by PHIs (i.e., this VReg has been visited as part of the + // uses of a phi.), or + // 2. Part of a target specific instruction (i.e., the target applied + // some register class constraints when creating the instruction.) + // If the constraints come for #2, the target said that another mapping + // is supported so we may just drop them. Indeed, if we do not change + // the number of registers holding that value, the uses will get fixed + // when we get to them. + // Uses in PHIs may have already been proceeded though. + // If the constraints come for #1, then, those are weak constraints and + // no actual uses may rely on them. However, the problem remains mainly + // the same as for #2. If the value stays in one register, we could + // just switch the register bank of the definition, but we would need to + // account for a repairing cost for each phi we silently change. + // + // In any case, if the value needs to be broken down into several + // registers, the repairing is not local anymore as we need to patch + // every uses to rebuild the value in just one register. + // + // To summarize: + // - If the value is in a physical register, we can do the split and + // fix locally. + // Otherwise if the value is in a virtual register: + // - If the value remains in one register, we do not have to split + // just switching the register bank would do, but we need to account + // in the repairing cost all the phi we changed. + // - If the value spans several registers, then we cannot do a local + // repairing. + + // Check if this is a physical or virtual register. + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // We are going to split every outgoing edges. + // Check that this is possible. + // FIXME: The machine representation is currently broken + // since it also several terminators in one basic block. + // Because of that we would technically need a way to get + // the targets of just one terminator to know which edges + // we have to split. + // Assert that we do not hit the ill-formed representation. + + // If there are other terminators before that one, some of + // the outgoing edges may not be dominated by this definition. + assert(&MI == &(*MI.getParent()->getFirstTerminator()) && + "Do not know which outgoing edges are relevant"); + const MachineInstr *Next = MI.getNextNode(); + assert((!Next || Next->isUnconditionalBranch()) && + "Do not know where each terminator ends up"); + if (Next) + // If the next terminator uses Reg, this means we have + // to split right after MI and thus we need a way to ask + // which outgoing edges are affected. + assert(!Next->readsRegister(Reg) && "Need to split between terminators"); + // We will split all the edges and repair there. + } else { + // This is a virtual register defined by a terminator. + if (ValMapping.NumBreakDowns == 1) { + // There is nothing to repair, but we may actually lie on + // the repairing cost because of the PHIs already proceeded + // as already stated. + // Though the code will be correct. + assert(false && "Repairing cost may not be accurate"); + } else { + // We need to do non-local repairing. Basically, patch all + // the uses (i.e., phis) that we already proceeded. + // For now, just say this mapping is not possible. + RepairPt.switchTo(RepairingPlacement::RepairingKind::Impossible); + } + } +} + +RegBankSelect::MappingCost RegBankSelect::computeMapping( + MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping, + SmallVectorImpl<RepairingPlacement> &RepairPts, + const RegBankSelect::MappingCost *BestCost) { + assert((MBFI || !BestCost) && "Costs comparison require MBFI"); + + if (!InstrMapping.isValid()) + return MappingCost::ImpossibleCost(); + + // If mapped with InstrMapping, MI will have the recorded cost. + MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1); + bool Saturated = Cost.addLocalCost(InstrMapping.getCost()); + assert(!Saturated && "Possible mapping saturated the cost"); + LLVM_DEBUG(dbgs() << "Evaluating mapping cost for: " << MI); + LLVM_DEBUG(dbgs() << "With: " << InstrMapping << '\n'); + RepairPts.clear(); + if (BestCost && Cost > *BestCost) { + LLVM_DEBUG(dbgs() << "Mapping is too expensive from the start\n"); + return Cost; + } + + // Moreover, to realize this mapping, the register bank of each operand must + // match this mapping. In other words, we may need to locally reassign the + // register banks. Account for that repairing cost as well. + // In this context, local means in the surrounding of MI. + for (unsigned OpIdx = 0, EndOpIdx = InstrMapping.getNumOperands(); + OpIdx != EndOpIdx; ++OpIdx) { + const MachineOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n'); + const RegisterBankInfo::ValueMapping &ValMapping = + InstrMapping.getOperandMapping(OpIdx); + // If Reg is already properly mapped, this is free. + bool Assign; + if (assignmentMatch(Reg, ValMapping, Assign)) { + LLVM_DEBUG(dbgs() << "=> is free (match).\n"); + continue; + } + if (Assign) { + LLVM_DEBUG(dbgs() << "=> is free (simple assignment).\n"); + RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this, + RepairingPlacement::Reassign)); + continue; + } + + // Find the insertion point for the repairing code. + RepairPts.emplace_back( + RepairingPlacement(MI, OpIdx, *TRI, *this, RepairingPlacement::Insert)); + RepairingPlacement &RepairPt = RepairPts.back(); + + // If we need to split a basic block to materialize this insertion point, + // we may give a higher cost to this mapping. + // Nevertheless, we may get away with the split, so try that first. + if (RepairPt.hasSplit()) + tryAvoidingSplit(RepairPt, MO, ValMapping); + + // Check that the materialization of the repairing is possible. + if (!RepairPt.canMaterialize()) { + LLVM_DEBUG(dbgs() << "Mapping involves impossible repairing\n"); + return MappingCost::ImpossibleCost(); + } + + // Account for the split cost and repair cost. + // Unless the cost is already saturated or we do not care about the cost. + if (!BestCost || Saturated) + continue; + + // To get accurate information we need MBFI and MBPI. + // Thus, if we end up here this information should be here. + assert(MBFI && MBPI && "Cost computation requires MBFI and MBPI"); + + // FIXME: We will have to rework the repairing cost model. + // The repairing cost depends on the register bank that MO has. + // However, when we break down the value into different values, + // MO may not have a register bank while still needing repairing. + // For the fast mode, we don't compute the cost so that is fine, + // but still for the repairing code, we will have to make a choice. + // For the greedy mode, we should choose greedily what is the best + // choice based on the next use of MO. + + // Sums up the repairing cost of MO at each insertion point. + uint64_t RepairCost = getRepairCost(MO, ValMapping); + + // This is an impossible to repair cost. + if (RepairCost == std::numeric_limits<unsigned>::max()) + return MappingCost::ImpossibleCost(); + + // Bias used for splitting: 5%. + const uint64_t PercentageForBias = 5; + uint64_t Bias = (RepairCost * PercentageForBias + 99) / 100; + // We should not need more than a couple of instructions to repair + // an assignment. In other words, the computation should not + // overflow because the repairing cost is free of basic block + // frequency. + assert(((RepairCost < RepairCost * PercentageForBias) && + (RepairCost * PercentageForBias < + RepairCost * PercentageForBias + 99)) && + "Repairing involves more than a billion of instructions?!"); + for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) { + assert(InsertPt->canMaterialize() && "We should not have made it here"); + // We will applied some basic block frequency and those uses uint64_t. + if (!InsertPt->isSplit()) + Saturated = Cost.addLocalCost(RepairCost); + else { + uint64_t CostForInsertPt = RepairCost; + // Again we shouldn't overflow here givent that + // CostForInsertPt is frequency free at this point. + assert(CostForInsertPt + Bias > CostForInsertPt && + "Repairing + split bias overflows"); + CostForInsertPt += Bias; + uint64_t PtCost = InsertPt->frequency(*this) * CostForInsertPt; + // Check if we just overflowed. + if ((Saturated = PtCost < CostForInsertPt)) + Cost.saturate(); + else + Saturated = Cost.addNonLocalCost(PtCost); + } + + // Stop looking into what it takes to repair, this is already + // too expensive. + if (BestCost && Cost > *BestCost) { + LLVM_DEBUG(dbgs() << "Mapping is too expensive, stop processing\n"); + return Cost; + } + + // No need to accumulate more cost information. + // We need to still gather the repairing information though. + if (Saturated) + break; + } + } + LLVM_DEBUG(dbgs() << "Total cost is: " << Cost << "\n"); + return Cost; +} + +bool RegBankSelect::applyMapping( + MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping, + SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) { + // OpdMapper will hold all the information needed for the rewritting. + RegisterBankInfo::OperandsMapper OpdMapper(MI, InstrMapping, *MRI); + + // First, place the repairing code. + for (RepairingPlacement &RepairPt : RepairPts) { + if (!RepairPt.canMaterialize() || + RepairPt.getKind() == RepairingPlacement::Impossible) + return false; + assert(RepairPt.getKind() != RepairingPlacement::None && + "This should not make its way in the list"); + unsigned OpIdx = RepairPt.getOpIdx(); + MachineOperand &MO = MI.getOperand(OpIdx); + const RegisterBankInfo::ValueMapping &ValMapping = + InstrMapping.getOperandMapping(OpIdx); + unsigned Reg = MO.getReg(); + + switch (RepairPt.getKind()) { + case RepairingPlacement::Reassign: + assert(ValMapping.NumBreakDowns == 1 && + "Reassignment should only be for simple mapping"); + MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank); + break; + case RepairingPlacement::Insert: + OpdMapper.createVRegs(OpIdx); + if (!repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx))) + return false; + break; + default: + llvm_unreachable("Other kind should not happen"); + } + } + + // Second, rewrite the instruction. + LLVM_DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n'); + RBI->applyMapping(OpdMapper); + + return true; +} + +bool RegBankSelect::assignInstr(MachineInstr &MI) { + LLVM_DEBUG(dbgs() << "Assign: " << MI); + // Remember the repairing placement for all the operands. + SmallVector<RepairingPlacement, 4> RepairPts; + + const RegisterBankInfo::InstructionMapping *BestMapping; + if (OptMode == RegBankSelect::Mode::Fast) { + BestMapping = &RBI->getInstrMapping(MI); + MappingCost DefaultCost = computeMapping(MI, *BestMapping, RepairPts); + (void)DefaultCost; + if (DefaultCost == MappingCost::ImpossibleCost()) + return false; + } else { + RegisterBankInfo::InstructionMappings PossibleMappings = + RBI->getInstrPossibleMappings(MI); + if (PossibleMappings.empty()) + return false; + BestMapping = &findBestMapping(MI, PossibleMappings, RepairPts); + } + // Make sure the mapping is valid for MI. + assert(BestMapping->verify(MI) && "Invalid instruction mapping"); + + LLVM_DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n'); + + // After this call, MI may not be valid anymore. + // Do not use it. + return applyMapping(MI, *BestMapping, RepairPts); +} + +bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n'); + const Function &F = MF.getFunction(); + Mode SaveOptMode = OptMode; + if (F.hasFnAttribute(Attribute::OptimizeNone)) + OptMode = Mode::Fast; + init(MF); + +#ifndef NDEBUG + // Check that our input is fully legal: we require the function to have the + // Legalized property, so it should be. + // FIXME: This should be in the MachineVerifier. + if (!DisableGISelLegalityCheck) + if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) { + reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", + "instruction is not legal", *MI); + return false; + } +#endif + + // Walk the function and assign register banks to all operands. + // Use a RPOT to make sure all registers are assigned before we choose + // the best mapping of the current instruction. + ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); + for (MachineBasicBlock *MBB : RPOT) { + // Set a sensible insertion point so that subsequent calls to + // MIRBuilder. + MIRBuilder.setMBB(*MBB); + for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end(); + MII != End;) { + // MI might be invalidated by the assignment, so move the + // iterator before hand. + MachineInstr &MI = *MII++; + + // Ignore target-specific instructions: they should use proper regclasses. + if (isTargetSpecificOpcode(MI.getOpcode())) + continue; + + if (!assignInstr(MI)) { + reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", + "unable to map instruction", MI); + return false; + } + } + } + OptMode = SaveOptMode; + return false; +} + +//------------------------------------------------------------------------------ +// Helper Classes Implementation +//------------------------------------------------------------------------------ +RegBankSelect::RepairingPlacement::RepairingPlacement( + MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P, + RepairingPlacement::RepairingKind Kind) + // Default is, we are going to insert code to repair OpIdx. + : Kind(Kind), OpIdx(OpIdx), + CanMaterialize(Kind != RepairingKind::Impossible), P(P) { + const MachineOperand &MO = MI.getOperand(OpIdx); + assert(MO.isReg() && "Trying to repair a non-reg operand"); + + if (Kind != RepairingKind::Insert) + return; + + // Repairings for definitions happen after MI, uses happen before. + bool Before = !MO.isDef(); + + // Check if we are done with MI. + if (!MI.isPHI() && !MI.isTerminator()) { + addInsertPoint(MI, Before); + // We are done with the initialization. + return; + } + + // Now, look for the special cases. + if (MI.isPHI()) { + // - PHI must be the first instructions: + // * Before, we have to split the related incoming edge. + // * After, move the insertion point past the last phi. + if (!Before) { + MachineBasicBlock::iterator It = MI.getParent()->getFirstNonPHI(); + if (It != MI.getParent()->end()) + addInsertPoint(*It, /*Before*/ true); + else + addInsertPoint(*(--It), /*Before*/ false); + return; + } + // We repair a use of a phi, we may need to split the related edge. + MachineBasicBlock &Pred = *MI.getOperand(OpIdx + 1).getMBB(); + // Check if we can move the insertion point prior to the + // terminators of the predecessor. + unsigned Reg = MO.getReg(); + MachineBasicBlock::iterator It = Pred.getLastNonDebugInstr(); + for (auto Begin = Pred.begin(); It != Begin && It->isTerminator(); --It) + if (It->modifiesRegister(Reg, &TRI)) { + // We cannot hoist the repairing code in the predecessor. + // Split the edge. + addInsertPoint(Pred, *MI.getParent()); + return; + } + // At this point, we can insert in Pred. + + // - If It is invalid, Pred is empty and we can insert in Pred + // wherever we want. + // - If It is valid, It is the first non-terminator, insert after It. + if (It == Pred.end()) + addInsertPoint(Pred, /*Beginning*/ false); + else + addInsertPoint(*It, /*Before*/ false); + } else { + // - Terminators must be the last instructions: + // * Before, move the insert point before the first terminator. + // * After, we have to split the outcoming edges. + unsigned Reg = MO.getReg(); + if (Before) { + // Check whether Reg is defined by any terminator. + MachineBasicBlock::iterator It = MI; + for (auto Begin = MI.getParent()->begin(); + --It != Begin && It->isTerminator();) + if (It->modifiesRegister(Reg, &TRI)) { + // Insert the repairing code right after the definition. + addInsertPoint(*It, /*Before*/ false); + return; + } + addInsertPoint(*It, /*Before*/ true); + return; + } + // Make sure Reg is not redefined by other terminators, otherwise + // we do not know how to split. + for (MachineBasicBlock::iterator It = MI, End = MI.getParent()->end(); + ++It != End;) + // The machine verifier should reject this kind of code. + assert(It->modifiesRegister(Reg, &TRI) && "Do not know where to split"); + // Split each outcoming edges. + MachineBasicBlock &Src = *MI.getParent(); + for (auto &Succ : Src.successors()) + addInsertPoint(Src, Succ); + } +} + +void RegBankSelect::RepairingPlacement::addInsertPoint(MachineInstr &MI, + bool Before) { + addInsertPoint(*new InstrInsertPoint(MI, Before)); +} + +void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &MBB, + bool Beginning) { + addInsertPoint(*new MBBInsertPoint(MBB, Beginning)); +} + +void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &Src, + MachineBasicBlock &Dst) { + addInsertPoint(*new EdgeInsertPoint(Src, Dst, P)); +} + +void RegBankSelect::RepairingPlacement::addInsertPoint( + RegBankSelect::InsertPoint &Point) { + CanMaterialize &= Point.canMaterialize(); + HasSplit |= Point.isSplit(); + InsertPoints.emplace_back(&Point); +} + +RegBankSelect::InstrInsertPoint::InstrInsertPoint(MachineInstr &Instr, + bool Before) + : InsertPoint(), Instr(Instr), Before(Before) { + // Since we do not support splitting, we do not need to update + // liveness and such, so do not do anything with P. + assert((!Before || !Instr.isPHI()) && + "Splitting before phis requires more points"); + assert((!Before || !Instr.getNextNode() || !Instr.getNextNode()->isPHI()) && + "Splitting between phis does not make sense"); +} + +void RegBankSelect::InstrInsertPoint::materialize() { + if (isSplit()) { + // Slice and return the beginning of the new block. + // If we need to split between the terminators, we theoritically + // need to know where the first and second set of terminators end + // to update the successors properly. + // Now, in pratice, we should have a maximum of 2 branch + // instructions; one conditional and one unconditional. Therefore + // we know how to update the successor by looking at the target of + // the unconditional branch. + // If we end up splitting at some point, then, we should update + // the liveness information and such. I.e., we would need to + // access P here. + // The machine verifier should actually make sure such cases + // cannot happen. + llvm_unreachable("Not yet implemented"); + } + // Otherwise the insertion point is just the current or next + // instruction depending on Before. I.e., there is nothing to do + // here. +} + +bool RegBankSelect::InstrInsertPoint::isSplit() const { + // If the insertion point is after a terminator, we need to split. + if (!Before) + return Instr.isTerminator(); + // If we insert before an instruction that is after a terminator, + // we are still after a terminator. + return Instr.getPrevNode() && Instr.getPrevNode()->isTerminator(); +} + +uint64_t RegBankSelect::InstrInsertPoint::frequency(const Pass &P) const { + // Even if we need to split, because we insert between terminators, + // this split has actually the same frequency as the instruction. + const MachineBlockFrequencyInfo *MBFI = + P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>(); + if (!MBFI) + return 1; + return MBFI->getBlockFreq(Instr.getParent()).getFrequency(); +} + +uint64_t RegBankSelect::MBBInsertPoint::frequency(const Pass &P) const { + const MachineBlockFrequencyInfo *MBFI = + P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>(); + if (!MBFI) + return 1; + return MBFI->getBlockFreq(&MBB).getFrequency(); +} + +void RegBankSelect::EdgeInsertPoint::materialize() { + // If we end up repairing twice at the same place before materializing the + // insertion point, we may think we have to split an edge twice. + // We should have a factory for the insert point such that identical points + // are the same instance. + assert(Src.isSuccessor(DstOrSplit) && DstOrSplit->isPredecessor(&Src) && + "This point has already been split"); + MachineBasicBlock *NewBB = Src.SplitCriticalEdge(DstOrSplit, P); + assert(NewBB && "Invalid call to materialize"); + // We reuse the destination block to hold the information of the new block. + DstOrSplit = NewBB; +} + +uint64_t RegBankSelect::EdgeInsertPoint::frequency(const Pass &P) const { + const MachineBlockFrequencyInfo *MBFI = + P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>(); + if (!MBFI) + return 1; + if (WasMaterialized) + return MBFI->getBlockFreq(DstOrSplit).getFrequency(); + + const MachineBranchProbabilityInfo *MBPI = + P.getAnalysisIfAvailable<MachineBranchProbabilityInfo>(); + if (!MBPI) + return 1; + // The basic block will be on the edge. + return (MBFI->getBlockFreq(&Src) * MBPI->getEdgeProbability(&Src, DstOrSplit)) + .getFrequency(); +} + +bool RegBankSelect::EdgeInsertPoint::canMaterialize() const { + // If this is not a critical edge, we should not have used this insert + // point. Indeed, either the successor or the predecessor should + // have do. + assert(Src.succ_size() > 1 && DstOrSplit->pred_size() > 1 && + "Edge is not critical"); + return Src.canSplitCriticalEdge(DstOrSplit); +} + +RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq) + : LocalFreq(LocalFreq.getFrequency()) {} + +bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) { + // Check if this overflows. + if (LocalCost + Cost < LocalCost) { + saturate(); + return true; + } + LocalCost += Cost; + return isSaturated(); +} + +bool RegBankSelect::MappingCost::addNonLocalCost(uint64_t Cost) { + // Check if this overflows. + if (NonLocalCost + Cost < NonLocalCost) { + saturate(); + return true; + } + NonLocalCost += Cost; + return isSaturated(); +} + +bool RegBankSelect::MappingCost::isSaturated() const { + return LocalCost == UINT64_MAX - 1 && NonLocalCost == UINT64_MAX && + LocalFreq == UINT64_MAX; +} + +void RegBankSelect::MappingCost::saturate() { + *this = ImpossibleCost(); + --LocalCost; +} + +RegBankSelect::MappingCost RegBankSelect::MappingCost::ImpossibleCost() { + return MappingCost(UINT64_MAX, UINT64_MAX, UINT64_MAX); +} + +bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const { + // Sort out the easy cases. + if (*this == Cost) + return false; + // If one is impossible to realize the other is cheaper unless it is + // impossible as well. + if ((*this == ImpossibleCost()) || (Cost == ImpossibleCost())) + return (*this == ImpossibleCost()) < (Cost == ImpossibleCost()); + // If one is saturated the other is cheaper, unless it is saturated + // as well. + if (isSaturated() || Cost.isSaturated()) + return isSaturated() < Cost.isSaturated(); + // At this point we know both costs hold sensible values. + + // If both values have a different base frequency, there is no much + // we can do but to scale everything. + // However, if they have the same base frequency we can avoid making + // complicated computation. + uint64_t ThisLocalAdjust; + uint64_t OtherLocalAdjust; + if (LLVM_LIKELY(LocalFreq == Cost.LocalFreq)) { + + // At this point, we know the local costs are comparable. + // Do the case that do not involve potential overflow first. + if (NonLocalCost == Cost.NonLocalCost) + // Since the non-local costs do not discriminate on the result, + // just compare the local costs. + return LocalCost < Cost.LocalCost; + + // The base costs are comparable so we may only keep the relative + // value to increase our chances of avoiding overflows. + ThisLocalAdjust = 0; + OtherLocalAdjust = 0; + if (LocalCost < Cost.LocalCost) + OtherLocalAdjust = Cost.LocalCost - LocalCost; + else + ThisLocalAdjust = LocalCost - Cost.LocalCost; + } else { + ThisLocalAdjust = LocalCost; + OtherLocalAdjust = Cost.LocalCost; + } + + // The non-local costs are comparable, just keep the relative value. + uint64_t ThisNonLocalAdjust = 0; + uint64_t OtherNonLocalAdjust = 0; + if (NonLocalCost < Cost.NonLocalCost) + OtherNonLocalAdjust = Cost.NonLocalCost - NonLocalCost; + else + ThisNonLocalAdjust = NonLocalCost - Cost.NonLocalCost; + // Scale everything to make them comparable. + uint64_t ThisScaledCost = ThisLocalAdjust * LocalFreq; + // Check for overflow on that operation. + bool ThisOverflows = ThisLocalAdjust && (ThisScaledCost < ThisLocalAdjust || + ThisScaledCost < LocalFreq); + uint64_t OtherScaledCost = OtherLocalAdjust * Cost.LocalFreq; + // Check for overflow on the last operation. + bool OtherOverflows = + OtherLocalAdjust && + (OtherScaledCost < OtherLocalAdjust || OtherScaledCost < Cost.LocalFreq); + // Add the non-local costs. + ThisOverflows |= ThisNonLocalAdjust && + ThisScaledCost + ThisNonLocalAdjust < ThisNonLocalAdjust; + ThisScaledCost += ThisNonLocalAdjust; + OtherOverflows |= OtherNonLocalAdjust && + OtherScaledCost + OtherNonLocalAdjust < OtherNonLocalAdjust; + OtherScaledCost += OtherNonLocalAdjust; + // If both overflows, we cannot compare without additional + // precision, e.g., APInt. Just give up on that case. + if (ThisOverflows && OtherOverflows) + return false; + // If one overflows but not the other, we can still compare. + if (ThisOverflows || OtherOverflows) + return ThisOverflows < OtherOverflows; + // Otherwise, just compare the values. + return ThisScaledCost < OtherScaledCost; +} + +bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const { + return LocalCost == Cost.LocalCost && NonLocalCost == Cost.NonLocalCost && + LocalFreq == Cost.LocalFreq; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegBankSelect::MappingCost::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + +void RegBankSelect::MappingCost::print(raw_ostream &OS) const { + if (*this == ImpossibleCost()) { + OS << "impossible"; + return; + } + if (isSaturated()) { + OS << "saturated"; + return; + } + OS << LocalFreq << " * " << LocalCost << " + " << NonLocalCost; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp new file mode 100644 index 000000000000..16f67a217ce1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -0,0 +1,114 @@ +//===- llvm/CodeGen/GlobalISel/RegisterBank.cpp - Register Bank --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the RegisterBank class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h" + +#define DEBUG_TYPE "registerbank" + +using namespace llvm; + +const unsigned RegisterBank::InvalidID = UINT_MAX; + +RegisterBank::RegisterBank( + unsigned ID, const char *Name, unsigned Size, + const uint32_t *CoveredClasses, unsigned NumRegClasses) + : ID(ID), Name(Name), Size(Size) { + ContainedRegClasses.resize(NumRegClasses); + ContainedRegClasses.setBitsInMask(CoveredClasses); +} + +bool RegisterBank::verify(const TargetRegisterInfo &TRI) const { + assert(isValid() && "Invalid register bank"); + for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) { + const TargetRegisterClass &RC = *TRI.getRegClass(RCId); + + if (!covers(RC)) + continue; + // Verify that the register bank covers all the sub classes of the + // classes it covers. + + // Use a different (slow in that case) method than + // RegisterBankInfo to find the subclasses of RC, to make sure + // both agree on the covers. + for (unsigned SubRCId = 0; SubRCId != End; ++SubRCId) { + const TargetRegisterClass &SubRC = *TRI.getRegClass(RCId); + + if (!RC.hasSubClassEq(&SubRC)) + continue; + + // Verify that the Size of the register bank is big enough to cover + // all the register classes it covers. + assert(getSize() >= TRI.getRegSizeInBits(SubRC) && + "Size is not big enough for all the subclasses!"); + assert(covers(SubRC) && "Not all subclasses are covered"); + } + } + return true; +} + +bool RegisterBank::covers(const TargetRegisterClass &RC) const { + assert(isValid() && "RB hasn't been initialized yet"); + return ContainedRegClasses.test(RC.getID()); +} + +bool RegisterBank::isValid() const { + return ID != InvalidID && Name != nullptr && Size != 0 && + // A register bank that does not cover anything is useless. + !ContainedRegClasses.empty(); +} + +bool RegisterBank::operator==(const RegisterBank &OtherRB) const { + // There must be only one instance of a given register bank alive + // for the whole compilation. + // The RegisterBankInfo is supposed to enforce that. + assert((OtherRB.getID() != getID() || &OtherRB == this) && + "ID does not uniquely identify a RegisterBank"); + return &OtherRB == this; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegisterBank::dump(const TargetRegisterInfo *TRI) const { + print(dbgs(), /* IsForDebug */ true, TRI); +} +#endif + +void RegisterBank::print(raw_ostream &OS, bool IsForDebug, + const TargetRegisterInfo *TRI) const { + OS << getName(); + if (!IsForDebug) + return; + OS << "(ID:" << getID() << ", Size:" << getSize() << ")\n" + << "isValid:" << isValid() << '\n' + << "Number of Covered register classes: " << ContainedRegClasses.count() + << '\n'; + // Print all the subclasses if we can. + // This register classes may not be properly initialized yet. + if (!TRI || ContainedRegClasses.empty()) + return; + assert(ContainedRegClasses.size() == TRI->getNumRegClasses() && + "TRI does not match the initialization process?"); + bool IsFirst = true; + OS << "Covered register classes:\n"; + for (unsigned RCId = 0, End = TRI->getNumRegClasses(); RCId != End; ++RCId) { + const TargetRegisterClass &RC = *TRI->getRegClass(RCId); + + if (!covers(RC)) + continue; + + if (!IsFirst) + OS << ", "; + OS << TRI->getRegClassName(&RC); + IsFirst = false; + } +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp new file mode 100644 index 000000000000..dd15567ef1c1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -0,0 +1,758 @@ +//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.cpp --------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the RegisterBankInfo class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <algorithm> // For std::max. + +#define DEBUG_TYPE "registerbankinfo" + +using namespace llvm; + +STATISTIC(NumPartialMappingsCreated, + "Number of partial mappings dynamically created"); +STATISTIC(NumPartialMappingsAccessed, + "Number of partial mappings dynamically accessed"); +STATISTIC(NumValueMappingsCreated, + "Number of value mappings dynamically created"); +STATISTIC(NumValueMappingsAccessed, + "Number of value mappings dynamically accessed"); +STATISTIC(NumOperandsMappingsCreated, + "Number of operands mappings dynamically created"); +STATISTIC(NumOperandsMappingsAccessed, + "Number of operands mappings dynamically accessed"); +STATISTIC(NumInstructionMappingsCreated, + "Number of instruction mappings dynamically created"); +STATISTIC(NumInstructionMappingsAccessed, + "Number of instruction mappings dynamically accessed"); + +const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX; +const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1; + +//------------------------------------------------------------------------------ +// RegisterBankInfo implementation. +//------------------------------------------------------------------------------ +RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks, + unsigned NumRegBanks) + : RegBanks(RegBanks), NumRegBanks(NumRegBanks) { +#ifndef NDEBUG + for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) { + assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank"); + assert(RegBanks[Idx]->isValid() && "RegisterBank should be valid"); + } +#endif // NDEBUG +} + +bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { +#ifndef NDEBUG + for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) { + const RegisterBank &RegBank = getRegBank(Idx); + assert(Idx == RegBank.getID() && + "ID does not match the index in the array"); + LLVM_DEBUG(dbgs() << "Verify " << RegBank << '\n'); + assert(RegBank.verify(TRI) && "RegBank is invalid"); + } +#endif // NDEBUG + return true; +} + +const RegisterBank * +RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI)); + + assert(Reg && "NoRegister does not have a register bank"); + const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); + if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>()) + return RB; + if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>()) + return &getRegBankFromRegClass(*RC); + return nullptr; +} + +const TargetRegisterClass & +RegisterBankInfo::getMinimalPhysRegClass(unsigned Reg, + const TargetRegisterInfo &TRI) const { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Reg must be a physreg"); + const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); + if (RegRCIt != PhysRegMinimalRCs.end()) + return *RegRCIt->second; + const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg); + PhysRegMinimalRCs[Reg] = PhysRC; + return *PhysRC; +} + +const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( + const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI) const { + // The mapping of the registers may be available via the + // register class constraints. + const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, &TRI); + + if (!RC) + return nullptr; + + const RegisterBank &RegBank = getRegBankFromRegClass(*RC); + // Sanity check that the target properly implemented getRegBankFromRegClass. + assert(RegBank.covers(*RC) && + "The mapping of the register bank does not make sense"); + return &RegBank; +} + +const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister( + unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) { + + // If the register already has a class, fallback to MRI::constrainRegClass. + auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); + if (RegClassOrBank.is<const TargetRegisterClass *>()) + return MRI.constrainRegClass(Reg, &RC); + + const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); + // Otherwise, all we can do is ensure the bank covers the class, and set it. + if (RB && !RB->covers(RC)) + return nullptr; + + // If nothing was set or the class is simply compatible, set it. + MRI.setRegClass(Reg, &RC); + return &RC; +} + +/// Check whether or not \p MI should be treated like a copy +/// for the mappings. +/// Copy like instruction are special for mapping because +/// they don't have actual register constraints. Moreover, +/// they sometimes have register classes assigned and we can +/// just use that instead of failing to provide a generic mapping. +static bool isCopyLike(const MachineInstr &MI) { + return MI.isCopy() || MI.isPHI() || + MI.getOpcode() == TargetOpcode::REG_SEQUENCE; +} + +const RegisterBankInfo::InstructionMapping & +RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { + // For copies we want to walk over the operands and try to find one + // that has a register bank since the instruction itself will not get + // us any constraint. + bool IsCopyLike = isCopyLike(MI); + // For copy like instruction, only the mapping of the definition + // is important. The rest is not constrained. + unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands(); + + const MachineFunction &MF = *MI.getMF(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + // We may need to query the instruction encoding to guess the mapping. + const TargetInstrInfo &TII = *STI.getInstrInfo(); + + // Before doing anything complicated check if the mapping is not + // directly available. + bool CompleteMapping = true; + + SmallVector<const ValueMapping *, 8> OperandsMapping(NumOperandsForMapping); + for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx; + ++OpIdx) { + const MachineOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + // The register bank of Reg is just a side effect of the current + // excution and in particular, there is no reason to believe this + // is the best default mapping for the current instruction. Keep + // it as an alternative register bank if we cannot figure out + // something. + const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI); + // For copy-like instruction, we want to reuse the register bank + // that is already set on Reg, if any, since those instructions do + // not have any constraints. + const RegisterBank *CurRegBank = IsCopyLike ? AltRegBank : nullptr; + if (!CurRegBank) { + // If this is a target specific instruction, we can deduce + // the register bank from the encoding constraints. + CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI); + if (!CurRegBank) { + // All our attempts failed, give up. + CompleteMapping = false; + + if (!IsCopyLike) + // MI does not carry enough information to guess the mapping. + return getInvalidInstructionMapping(); + continue; + } + } + const ValueMapping *ValMapping = + &getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank); + if (IsCopyLike) { + OperandsMapping[0] = ValMapping; + CompleteMapping = true; + break; + } + OperandsMapping[OpIdx] = ValMapping; + } + + if (IsCopyLike && !CompleteMapping) + // No way to deduce the type from what we have. + return getInvalidInstructionMapping(); + + assert(CompleteMapping && "Setting an uncomplete mapping"); + return getInstructionMapping( + DefaultMappingID, /*Cost*/ 1, + /*OperandsMapping*/ getOperandsMapping(OperandsMapping), + NumOperandsForMapping); +} + +/// Hashing function for PartialMapping. +static hash_code hashPartialMapping(unsigned StartIdx, unsigned Length, + const RegisterBank *RegBank) { + return hash_combine(StartIdx, Length, RegBank ? RegBank->getID() : 0); +} + +/// Overloaded version of hash_value for a PartialMapping. +hash_code +llvm::hash_value(const RegisterBankInfo::PartialMapping &PartMapping) { + return hashPartialMapping(PartMapping.StartIdx, PartMapping.Length, + PartMapping.RegBank); +} + +const RegisterBankInfo::PartialMapping & +RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length, + const RegisterBank &RegBank) const { + ++NumPartialMappingsAccessed; + + hash_code Hash = hashPartialMapping(StartIdx, Length, &RegBank); + const auto &It = MapOfPartialMappings.find(Hash); + if (It != MapOfPartialMappings.end()) + return *It->second; + + ++NumPartialMappingsCreated; + + auto &PartMapping = MapOfPartialMappings[Hash]; + PartMapping = llvm::make_unique<PartialMapping>(StartIdx, Length, RegBank); + return *PartMapping; +} + +const RegisterBankInfo::ValueMapping & +RegisterBankInfo::getValueMapping(unsigned StartIdx, unsigned Length, + const RegisterBank &RegBank) const { + return getValueMapping(&getPartialMapping(StartIdx, Length, RegBank), 1); +} + +static hash_code +hashValueMapping(const RegisterBankInfo::PartialMapping *BreakDown, + unsigned NumBreakDowns) { + if (LLVM_LIKELY(NumBreakDowns == 1)) + return hash_value(*BreakDown); + SmallVector<size_t, 8> Hashes(NumBreakDowns); + for (unsigned Idx = 0; Idx != NumBreakDowns; ++Idx) + Hashes.push_back(hash_value(BreakDown[Idx])); + return hash_combine_range(Hashes.begin(), Hashes.end()); +} + +const RegisterBankInfo::ValueMapping & +RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown, + unsigned NumBreakDowns) const { + ++NumValueMappingsAccessed; + + hash_code Hash = hashValueMapping(BreakDown, NumBreakDowns); + const auto &It = MapOfValueMappings.find(Hash); + if (It != MapOfValueMappings.end()) + return *It->second; + + ++NumValueMappingsCreated; + + auto &ValMapping = MapOfValueMappings[Hash]; + ValMapping = llvm::make_unique<ValueMapping>(BreakDown, NumBreakDowns); + return *ValMapping; +} + +template <typename Iterator> +const RegisterBankInfo::ValueMapping * +RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const { + + ++NumOperandsMappingsAccessed; + + // The addresses of the value mapping are unique. + // Therefore, we can use them directly to hash the operand mapping. + hash_code Hash = hash_combine_range(Begin, End); + auto &Res = MapOfOperandsMappings[Hash]; + if (Res) + return Res.get(); + + ++NumOperandsMappingsCreated; + + // Create the array of ValueMapping. + // Note: this array will not hash to this instance of operands + // mapping, because we use the pointer of the ValueMapping + // to hash and we expect them to uniquely identify an instance + // of value mapping. + Res = llvm::make_unique<ValueMapping[]>(std::distance(Begin, End)); + unsigned Idx = 0; + for (Iterator It = Begin; It != End; ++It, ++Idx) { + const ValueMapping *ValMap = *It; + if (!ValMap) + continue; + Res[Idx] = *ValMap; + } + return Res.get(); +} + +const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping( + const SmallVectorImpl<const RegisterBankInfo::ValueMapping *> &OpdsMapping) + const { + return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end()); +} + +const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping( + std::initializer_list<const RegisterBankInfo::ValueMapping *> OpdsMapping) + const { + return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end()); +} + +static hash_code +hashInstructionMapping(unsigned ID, unsigned Cost, + const RegisterBankInfo::ValueMapping *OperandsMapping, + unsigned NumOperands) { + return hash_combine(ID, Cost, OperandsMapping, NumOperands); +} + +const RegisterBankInfo::InstructionMapping & +RegisterBankInfo::getInstructionMappingImpl( + bool IsInvalid, unsigned ID, unsigned Cost, + const RegisterBankInfo::ValueMapping *OperandsMapping, + unsigned NumOperands) const { + assert(((IsInvalid && ID == InvalidMappingID && Cost == 0 && + OperandsMapping == nullptr && NumOperands == 0) || + !IsInvalid) && + "Mismatch argument for invalid input"); + ++NumInstructionMappingsAccessed; + + hash_code Hash = + hashInstructionMapping(ID, Cost, OperandsMapping, NumOperands); + const auto &It = MapOfInstructionMappings.find(Hash); + if (It != MapOfInstructionMappings.end()) + return *It->second; + + ++NumInstructionMappingsCreated; + + auto &InstrMapping = MapOfInstructionMappings[Hash]; + if (IsInvalid) + InstrMapping = llvm::make_unique<InstructionMapping>(); + else + InstrMapping = llvm::make_unique<InstructionMapping>( + ID, Cost, OperandsMapping, NumOperands); + return *InstrMapping; +} + +const RegisterBankInfo::InstructionMapping & +RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { + const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); + if (Mapping.isValid()) + return Mapping; + llvm_unreachable("The target must implement this"); +} + +RegisterBankInfo::InstructionMappings +RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const { + InstructionMappings PossibleMappings; + // Put the default mapping first. + PossibleMappings.push_back(&getInstrMapping(MI)); + // Then the alternative mapping, if any. + InstructionMappings AltMappings = getInstrAlternativeMappings(MI); + for (const InstructionMapping *AltMapping : AltMappings) + PossibleMappings.push_back(AltMapping); +#ifndef NDEBUG + for (const InstructionMapping *Mapping : PossibleMappings) + assert(Mapping->verify(MI) && "Mapping is invalid"); +#endif + return PossibleMappings; +} + +RegisterBankInfo::InstructionMappings +RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const { + // No alternative for MI. + return InstructionMappings(); +} + +void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { + MachineInstr &MI = OpdMapper.getMI(); + MachineRegisterInfo &MRI = OpdMapper.getMRI(); + LLVM_DEBUG(dbgs() << "Applying default-like mapping\n"); + for (unsigned OpIdx = 0, + EndIdx = OpdMapper.getInstrMapping().getNumOperands(); + OpIdx != EndIdx; ++OpIdx) { + LLVM_DEBUG(dbgs() << "OpIdx " << OpIdx); + MachineOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) { + LLVM_DEBUG(dbgs() << " is not a register, nothing to be done\n"); + continue; + } + if (!MO.getReg()) { + LLVM_DEBUG(dbgs() << " is %%noreg, nothing to be done\n"); + continue; + } + assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns != + 0 && + "Invalid mapping"); + assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns == + 1 && + "This mapping is too complex for this function"); + iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs = + OpdMapper.getVRegs(OpIdx); + if (NewRegs.begin() == NewRegs.end()) { + LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); + continue; + } + unsigned OrigReg = MO.getReg(); + unsigned NewReg = *NewRegs.begin(); + LLVM_DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr)); + MO.setReg(NewReg); + LLVM_DEBUG(dbgs() << " with " << printReg(NewReg, nullptr)); + + // The OperandsMapper creates plain scalar, we may have to fix that. + // Check if the types match and if not, fix that. + LLT OrigTy = MRI.getType(OrigReg); + LLT NewTy = MRI.getType(NewReg); + if (OrigTy != NewTy) { + // The default mapping is not supposed to change the size of + // the storage. However, right now we don't necessarily bump all + // the types to storage size. For instance, we can consider + // s16 G_AND legal whereas the storage size is going to be 32. + assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() && + "Types with difference size cannot be handled by the default " + "mapping"); + LLVM_DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to " + << OrigTy); + MRI.setType(NewReg, OrigTy); + } + LLVM_DEBUG(dbgs() << '\n'); + } +} + +unsigned RegisterBankInfo::getSizeInBits(unsigned Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // The size is not directly available for physical registers. + // Instead, we need to access a register class that contains Reg and + // get the size of that register class. + // Because this is expensive, we'll cache the register class by calling + auto *RC = &getMinimalPhysRegClass(Reg, TRI); + assert(RC && "Expecting Register class"); + return TRI.getRegSizeInBits(*RC); + } + return TRI.getRegSizeInBits(Reg, MRI); +} + +//------------------------------------------------------------------------------ +// Helper classes implementation. +//------------------------------------------------------------------------------ +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + +bool RegisterBankInfo::PartialMapping::verify() const { + assert(RegBank && "Register bank not set"); + assert(Length && "Empty mapping"); + assert((StartIdx <= getHighBitIdx()) && "Overflow, switch to APInt?"); + // Check if the minimum width fits into RegBank. + assert(RegBank->getSize() >= Length && "Register bank too small for Mask"); + return true; +} + +void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const { + OS << "[" << StartIdx << ", " << getHighBitIdx() << "], RegBank = "; + if (RegBank) + OS << *RegBank; + else + OS << "nullptr"; +} + +bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const { + assert(NumBreakDowns && "Value mapped nowhere?!"); + unsigned OrigValueBitWidth = 0; + for (const RegisterBankInfo::PartialMapping &PartMap : *this) { + // Check that each register bank is big enough to hold the partial value: + // this check is done by PartialMapping::verify + assert(PartMap.verify() && "Partial mapping is invalid"); + // The original value should completely be mapped. + // Thus the maximum accessed index + 1 is the size of the original value. + OrigValueBitWidth = + std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1); + } + assert(OrigValueBitWidth >= MeaningfulBitWidth && + "Meaningful bits not covered by the mapping"); + APInt ValueMask(OrigValueBitWidth, 0); + for (const RegisterBankInfo::PartialMapping &PartMap : *this) { + // Check that the union of the partial mappings covers the whole value, + // without overlaps. + // The high bit is exclusive in the APInt API, thus getHighBitIdx + 1. + APInt PartMapMask = APInt::getBitsSet(OrigValueBitWidth, PartMap.StartIdx, + PartMap.getHighBitIdx() + 1); + ValueMask ^= PartMapMask; + assert((ValueMask & PartMapMask) == PartMapMask && + "Some partial mappings overlap"); + } + assert(ValueMask.isAllOnesValue() && "Value is not fully mapped"); + return true; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegisterBankInfo::ValueMapping::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + +void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const { + OS << "#BreakDown: " << NumBreakDowns << " "; + bool IsFirst = true; + for (const PartialMapping &PartMap : *this) { + if (!IsFirst) + OS << ", "; + OS << '[' << PartMap << ']'; + IsFirst = false; + } +} + +bool RegisterBankInfo::InstructionMapping::verify( + const MachineInstr &MI) const { + // Check that all the register operands are properly mapped. + // Check the constructor invariant. + // For PHI, we only care about mapping the definition. + assert(NumOperands == (isCopyLike(MI) ? 1 : MI.getNumOperands()) && + "NumOperands must match, see constructor"); + assert(MI.getParent() && MI.getMF() && + "MI must be connected to a MachineFunction"); + const MachineFunction &MF = *MI.getMF(); + const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo(); + (void)RBI; + + for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { + const MachineOperand &MO = MI.getOperand(Idx); + if (!MO.isReg()) { + assert(!getOperandMapping(Idx).isValid() && + "We should not care about non-reg mapping"); + continue; + } + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(getOperandMapping(Idx).isValid() && + "We must have a mapping for reg operands"); + const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx); + (void)MOMapping; + // Register size in bits. + // This size must match what the mapping expects. + assert(MOMapping.verify(RBI->getSizeInBits( + Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) && + "Value mapping is invalid"); + } + return true; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegisterBankInfo::InstructionMapping::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + +void RegisterBankInfo::InstructionMapping::print(raw_ostream &OS) const { + OS << "ID: " << getID() << " Cost: " << getCost() << " Mapping: "; + + for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { + const ValueMapping &ValMapping = getOperandMapping(OpIdx); + if (OpIdx) + OS << ", "; + OS << "{ Idx: " << OpIdx << " Map: " << ValMapping << '}'; + } +} + +const int RegisterBankInfo::OperandsMapper::DontKnowIdx = -1; + +RegisterBankInfo::OperandsMapper::OperandsMapper( + MachineInstr &MI, const InstructionMapping &InstrMapping, + MachineRegisterInfo &MRI) + : MRI(MRI), MI(MI), InstrMapping(InstrMapping) { + unsigned NumOpds = InstrMapping.getNumOperands(); + OpToNewVRegIdx.resize(NumOpds, OperandsMapper::DontKnowIdx); + assert(InstrMapping.verify(MI) && "Invalid mapping for MI"); +} + +iterator_range<SmallVectorImpl<unsigned>::iterator> +RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) { + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); + unsigned NumPartialVal = + getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns; + int StartIdx = OpToNewVRegIdx[OpIdx]; + + if (StartIdx == OperandsMapper::DontKnowIdx) { + // This is the first time we try to access OpIdx. + // Create the cells that will hold all the partial values at the + // end of the list of NewVReg. + StartIdx = NewVRegs.size(); + OpToNewVRegIdx[OpIdx] = StartIdx; + for (unsigned i = 0; i < NumPartialVal; ++i) + NewVRegs.push_back(0); + } + SmallVectorImpl<unsigned>::iterator End = + getNewVRegsEnd(StartIdx, NumPartialVal); + + return make_range(&NewVRegs[StartIdx], End); +} + +SmallVectorImpl<unsigned>::const_iterator +RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, + unsigned NumVal) const { + return const_cast<OperandsMapper *>(this)->getNewVRegsEnd(StartIdx, NumVal); +} +SmallVectorImpl<unsigned>::iterator +RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, + unsigned NumVal) { + assert((NewVRegs.size() == StartIdx + NumVal || + NewVRegs.size() > StartIdx + NumVal) && + "NewVRegs too small to contain all the partial mapping"); + return NewVRegs.size() <= StartIdx + NumVal ? NewVRegs.end() + : &NewVRegs[StartIdx + NumVal]; +} + +void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) { + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); + iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx = + getVRegsMem(OpIdx); + const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx); + const PartialMapping *PartMap = ValMapping.begin(); + for (unsigned &NewVReg : NewVRegsForOpIdx) { + assert(PartMap != ValMapping.end() && "Out-of-bound access"); + assert(NewVReg == 0 && "Register has already been created"); + // The new registers are always bound to scalar with the right size. + // The actual type has to be set when the target does the mapping + // of the instruction. + // The rationale is that this generic code cannot guess how the + // target plans to split the input type. + NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length)); + MRI.setRegBank(NewVReg, *PartMap->RegBank); + ++PartMap; + } +} + +void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx, + unsigned PartialMapIdx, + unsigned NewVReg) { + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); + assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns > + PartialMapIdx && + "Out-of-bound access for partial mapping"); + // Make sure the memory is initialized for that operand. + (void)getVRegsMem(OpIdx); + assert(NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] == 0 && + "This value is already set"); + NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] = NewVReg; +} + +iterator_range<SmallVectorImpl<unsigned>::const_iterator> +RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx, + bool ForDebug) const { + (void)ForDebug; + assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access"); + int StartIdx = OpToNewVRegIdx[OpIdx]; + + if (StartIdx == OperandsMapper::DontKnowIdx) + return make_range(NewVRegs.end(), NewVRegs.end()); + + unsigned PartMapSize = + getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns; + SmallVectorImpl<unsigned>::const_iterator End = + getNewVRegsEnd(StartIdx, PartMapSize); + iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res = + make_range(&NewVRegs[StartIdx], End); +#ifndef NDEBUG + for (unsigned VReg : Res) + assert((VReg || ForDebug) && "Some registers are uninitialized"); +#endif + return Res; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void RegisterBankInfo::OperandsMapper::dump() const { + print(dbgs(), true); + dbgs() << '\n'; +} +#endif + +void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS, + bool ForDebug) const { + unsigned NumOpds = getInstrMapping().getNumOperands(); + if (ForDebug) { + OS << "Mapping for " << getMI() << "\nwith " << getInstrMapping() << '\n'; + // Print out the internal state of the index table. + OS << "Populated indices (CellNumber, IndexInNewVRegs): "; + bool IsFirst = true; + for (unsigned Idx = 0; Idx != NumOpds; ++Idx) { + if (OpToNewVRegIdx[Idx] != DontKnowIdx) { + if (!IsFirst) + OS << ", "; + OS << '(' << Idx << ", " << OpToNewVRegIdx[Idx] << ')'; + IsFirst = false; + } + } + OS << '\n'; + } else + OS << "Mapping ID: " << getInstrMapping().getID() << ' '; + + OS << "Operand Mapping: "; + // If we have a function, we can pretty print the name of the registers. + // Otherwise we will print the raw numbers. + const TargetRegisterInfo *TRI = + getMI().getParent() && getMI().getMF() + ? getMI().getMF()->getSubtarget().getRegisterInfo() + : nullptr; + bool IsFirst = true; + for (unsigned Idx = 0; Idx != NumOpds; ++Idx) { + if (OpToNewVRegIdx[Idx] == DontKnowIdx) + continue; + if (!IsFirst) + OS << ", "; + IsFirst = false; + OS << '(' << printReg(getMI().getOperand(Idx).getReg(), TRI) << ", ["; + bool IsFirstNewVReg = true; + for (unsigned VReg : getVRegs(Idx)) { + if (!IsFirstNewVReg) + OS << ", "; + IsFirstNewVReg = false; + OS << printReg(VReg, TRI); + } + OS << "])"; + } +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp new file mode 100644 index 000000000000..1a5f88743d5f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -0,0 +1,240 @@ +//===- llvm/CodeGen/GlobalISel/Utils.cpp -------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file This file implements the utility functions used by the GlobalISel +/// pipeline. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/Constants.h" + +#define DEBUG_TYPE "globalisel-utils" + +using namespace llvm; + +unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + const RegisterBankInfo &RBI, + MachineInstr &InsertPt, unsigned Reg, + const TargetRegisterClass &RegClass) { + if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) { + unsigned NewReg = MRI.createVirtualRegister(&RegClass); + BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(), + TII.get(TargetOpcode::COPY), NewReg) + .addReg(Reg); + return NewReg; + } + + return Reg; +} + +unsigned llvm::constrainOperandRegClass( + const MachineFunction &MF, const TargetRegisterInfo &TRI, + MachineRegisterInfo &MRI, const TargetInstrInfo &TII, + const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, + const MachineOperand &RegMO, unsigned OpIdx) { + unsigned Reg = RegMO.getReg(); + // Assume physical registers are properly constrained. + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "PhysReg not implemented"); + + const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); + // Some of the target independent instructions, like COPY, may not impose any + // register class constraints on some of their operands: If it's a use, we can + // skip constraining as the instruction defining the register would constrain + // it. + + // We can't constrain unallocatable register classes, because we can't create + // virtual registers for these classes, so we need to let targets handled this + // case. + if (RegClass && !RegClass->isAllocatable()) + RegClass = TRI.getConstrainedRegClassForOperand(RegMO, MRI); + + if (!RegClass) { + assert((!isTargetSpecificOpcode(II.getOpcode()) || RegMO.isUse()) && + "Register class constraint is required unless either the " + "instruction is target independent or the operand is a use"); + // FIXME: Just bailing out like this here could be not enough, unless we + // expect the users of this function to do the right thing for PHIs and + // COPY: + // v1 = COPY v0 + // v2 = COPY v1 + // v1 here may end up not being constrained at all. Please notice that to + // reproduce the issue we likely need a destination pattern of a selection + // rule producing such extra copies, not just an input GMIR with them as + // every existing target using selectImpl handles copies before calling it + // and they never reach this function. + return Reg; + } + return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass); +} + +bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(!isPreISelGenericOpcode(I.getOpcode()) && + "A selected instruction is expected"); + MachineBasicBlock &MBB = *I.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) { + MachineOperand &MO = I.getOperand(OpI); + + // There's nothing to be done on non-register operands. + if (!MO.isReg()) + continue; + + LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n'); + assert(MO.isReg() && "Unsupported non-reg operand"); + + unsigned Reg = MO.getReg(); + // Physical registers don't need to be constrained. + if (TRI.isPhysicalRegister(Reg)) + continue; + + // Register operands with a value of 0 (e.g. predicate operands) don't need + // to be constrained. + if (Reg == 0) + continue; + + // If the operand is a vreg, we should constrain its regclass, and only + // insert COPYs if that's impossible. + // constrainOperandRegClass does that for us. + MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), + MO, OpI)); + + // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been + // done. + if (MO.isUse()) { + int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO); + if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx)) + I.tieOperands(DefIdx, OpI); + } + } + return true; +} + +bool llvm::isTriviallyDead(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + // If we can move an instruction, we can remove it. Otherwise, it has + // a side-effect of some sort. + bool SawStore = false; + if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore)) + return false; + + // Instructions without side-effects are dead iff they only define dead vregs. + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !MRI.use_nodbg_empty(Reg)) + return false; + } + return true; +} + +void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, + MachineOptimizationRemarkEmitter &MORE, + MachineOptimizationRemarkMissed &R) { + MF.getProperties().set(MachineFunctionProperties::Property::FailedISel); + + // Print the function name explicitly if we don't have a debug location (which + // makes the diagnostic less useful) or if we're going to emit a raw error. + if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled()) + R << (" (in function: " + MF.getName() + ")").str(); + + if (TPC.isGlobalISelAbortEnabled()) + report_fatal_error(R.getMsg()); + else + MORE.emit(R); +} + +void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, + MachineOptimizationRemarkEmitter &MORE, + const char *PassName, StringRef Msg, + const MachineInstr &MI) { + MachineOptimizationRemarkMissed R(PassName, "GISelFailure: ", + MI.getDebugLoc(), MI.getParent()); + R << Msg; + // Printing MI is expensive; only do it if expensive remarks are enabled. + if (TPC.isGlobalISelAbortEnabled() || MORE.allowExtraAnalysis(PassName)) + R << ": " << ore::MNV("Inst", MI); + reportGISelFailure(MF, TPC, MORE, R); +} + +Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg, + const MachineRegisterInfo &MRI) { + MachineInstr *MI = MRI.getVRegDef(VReg); + if (MI->getOpcode() != TargetOpcode::G_CONSTANT) + return None; + + if (MI->getOperand(1).isImm()) + return MI->getOperand(1).getImm(); + + if (MI->getOperand(1).isCImm() && + MI->getOperand(1).getCImm()->getBitWidth() <= 64) + return MI->getOperand(1).getCImm()->getSExtValue(); + + return None; +} + +const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg, + const MachineRegisterInfo &MRI) { + MachineInstr *MI = MRI.getVRegDef(VReg); + if (TargetOpcode::G_FCONSTANT != MI->getOpcode()) + return nullptr; + return MI->getOperand(1).getFPImm(); +} + +llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg, + const MachineRegisterInfo &MRI) { + auto *DefMI = MRI.getVRegDef(Reg); + auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); + if (!DstTy.isValid()) + return nullptr; + while (DefMI->getOpcode() == TargetOpcode::COPY) { + unsigned SrcReg = DefMI->getOperand(1).getReg(); + auto SrcTy = MRI.getType(SrcReg); + if (!SrcTy.isValid() || SrcTy != DstTy) + break; + DefMI = MRI.getVRegDef(SrcReg); + } + return DefMI->getOpcode() == Opcode ? DefMI : nullptr; +} + +APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) { + if (Size == 32) + return APFloat(float(Val)); + if (Size == 64) + return APFloat(Val); + if (Size != 16) + llvm_unreachable("Unsupported FPConstant size"); + bool Ignored; + APFloat APF(Val); + APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); + return APF; +} + +void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { + AU.addPreserved<StackProtector>(); +} |