diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-06 20:24:06 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-06 20:24:06 +0000 |
commit | 95ec533a1d8c450f6c6c5e84fe85423960e13382 (patch) | |
tree | bfe77b0dccd50ed2f4b4e6299d4bc4eaafced6e7 /contrib/llvm/lib/Target | |
parent | 2b532af82919b9141e7fd04becf354a0a7dfa813 (diff) | |
parent | 7e7b6700743285c0af506ac6299ddf82ebd434b9 (diff) |
Merge llvm, clang, lld and lldb trunk r291274, and resolve conflicts.
Notes
Notes:
svn path=/projects/clang400-import/; revision=311544
Diffstat (limited to 'contrib/llvm/lib/Target')
35 files changed, 1261 insertions, 1442 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp index 7666011f75b6..17aafa0c3d6e 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -110,72 +110,34 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "aarch64-collect-loh" -static cl::opt<bool> -PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden, - cl::desc("Restrict analysis to registers invovled" - " in LOHs"), - cl::init(true)); - -static cl::opt<bool> -BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden, - cl::desc("Restrict analysis at basic block scope"), - cl::init(true)); - STATISTIC(NumADRPSimpleCandidate, "Number of simplifiable ADRP dominate by another"); -#ifndef NDEBUG -STATISTIC(NumADRPComplexCandidate2, - "Number of simplifiable ADRP reachable by 2 defs"); -STATISTIC(NumADRPComplexCandidate3, - "Number of simplifiable ADRP reachable by 3 defs"); -STATISTIC(NumADRPComplexCandidateOther, - "Number of simplifiable ADRP reachable by 4 or more defs"); -STATISTIC(NumADDToSTRWithImm, - "Number of simplifiable STR with imm reachable by ADD"); -STATISTIC(NumLDRToSTRWithImm, - "Number of simplifiable STR with imm reachable by LDR"); STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD"); STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR"); -STATISTIC(NumADDToLDRWithImm, - "Number of simplifiable LDR with imm reachable by ADD"); -STATISTIC(NumLDRToLDRWithImm, - "Number of simplifiable LDR with imm reachable by LDR"); STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD"); STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR"); -#endif // NDEBUG STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP"); -#ifndef NDEBUG -STATISTIC(NumCplxLvl1, "Number of complex case of level 1"); -STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1"); -STATISTIC(NumCplxLvl2, "Number of complex case of level 2"); -STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2"); -#endif // NDEBUG STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD"); -STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD"); #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)" namespace { + struct AArch64CollectLOH : public MachineFunctionPass { static char ID; - AArch64CollectLOH() : MachineFunctionPass(ID) { - initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry()); - } + AArch64CollectLOH() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -187,351 +149,57 @@ struct AArch64CollectLOH : public MachineFunctionPass { StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired<MachineDominatorTree>(); + AU.setPreservesAll(); } - -private: }; -/// A set of MachineInstruction. -typedef SetVector<const MachineInstr *> SetOfMachineInstr; -/// Map a basic block to a set of instructions per register. -/// This is used to represent the exposed uses of a basic block -/// per register. -typedef MapVector<const MachineBasicBlock *, - std::unique_ptr<SetOfMachineInstr[]>> -BlockToSetOfInstrsPerColor; -/// Map a basic block to an instruction per register. -/// This is used to represent the live-out definitions of a basic block -/// per register. -typedef MapVector<const MachineBasicBlock *, - std::unique_ptr<const MachineInstr *[]>> -BlockToInstrPerColor; -/// Map an instruction to a set of instructions. Used to represent the -/// mapping def to reachable uses or use to definitions. -typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs; -/// Map a basic block to a BitVector. -/// This is used to record the kill registers per basic block. -typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet; - -/// Map a register to a dense id. -typedef DenseMap<unsigned, unsigned> MapRegToId; -/// Map a dense id to a register. Used for debug purposes. -typedef SmallVector<unsigned, 32> MapIdToReg; -} // end anonymous namespace. - char AArch64CollectLOH::ID = 0; -INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh", - AARCH64_COLLECT_LOH_NAME, false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh", - AARCH64_COLLECT_LOH_NAME, false, false) - -/// Given a couple (MBB, reg) get the corresponding set of instruction from -/// the given "sets". -/// If this couple does not reference any set, an empty set is added to "sets" -/// for this couple and returned. -/// \param nbRegs is used internally allocate some memory. It must be consistent -/// with the way sets is used. -static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets, - const MachineBasicBlock &MBB, unsigned reg, - unsigned nbRegs) { - SetOfMachineInstr *result; - BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB); - if (it != sets.end()) - result = it->second.get(); - else - result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get(); - - return result[reg]; -} - -/// Given a couple (reg, MI) get the corresponding set of instructions from the -/// the given "sets". -/// This is used to get the uses record in sets of a definition identified by -/// MI and reg, i.e., MI defines reg. -/// If the couple does not reference anything, an empty set is added to -/// "sets[reg]". -/// \pre set[reg] is valid. -static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg, - const MachineInstr &MI) { - return sets[reg][&MI]; -} - -/// Same as getUses but does not modify the input map: sets. -/// \return NULL if the couple (reg, MI) is not in sets. -static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, - const MachineInstr &MI) { - InstrToInstrs::const_iterator Res = sets[reg].find(&MI); - if (Res != sets[reg].end()) - return &(Res->second); - return nullptr; -} - -/// Initialize the reaching definition algorithm: -/// For each basic block BB in MF, record: -/// - its kill set. -/// - its reachable uses (uses that are exposed to BB's predecessors). -/// - its the generated definitions. -/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to -/// the list of uses of exposed defintions. -/// \param ADRPMode specifies to only consider ADRP instructions for generated -/// definition. It also consider definitions of ADRP instructions as uses and -/// ignore other uses. The ADRPMode is used to collect the information for LHO -/// that involve ADRP operation only. -static void initReachingDef(const MachineFunction &MF, - InstrToInstrs *ColorOpToReachedUses, - BlockToInstrPerColor &Gen, BlockToRegSet &Kill, - BlockToSetOfInstrsPerColor &ReachableUses, - const MapRegToId &RegToId, - const MachineInstr *DummyOp, bool ADRPMode) { - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - unsigned NbReg = RegToId.size(); - - for (const MachineBasicBlock &MBB : MF) { - auto &BBGen = Gen[&MBB]; - BBGen = make_unique<const MachineInstr *[]>(NbReg); - std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr); - - BitVector &BBKillSet = Kill[&MBB]; - BBKillSet.resize(NbReg); - for (const MachineInstr &MI : MBB) { - bool IsADRP = MI.getOpcode() == AArch64::ADRP; - - // Process uses first. - if (IsADRP || !ADRPMode) - for (const MachineOperand &MO : MI.operands()) { - // Treat ADRP def as use, as the goal of the analysis is to find - // ADRP defs reached by other ADRP defs. - if (!MO.isReg() || (!ADRPMode && !MO.isUse()) || - (ADRPMode && (!IsADRP || !MO.isDef()))) - continue; - unsigned CurReg = MO.getReg(); - MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); - if (ItCurRegId == RegToId.end()) - continue; - CurReg = ItCurRegId->second; - - // if CurReg has not been defined, this use is reachable. - if (!BBGen[CurReg] && !BBKillSet.test(CurReg)) - getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI); - // current basic block definition for this color, if any, is in Gen. - if (BBGen[CurReg]) - getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI); - } - - // Process clobbers. - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isRegMask()) - continue; - // Clobbers kill the related colors. - const uint32_t *PreservedRegs = MO.getRegMask(); - - // Set generated regs. - for (const auto &Entry : RegToId) { - unsigned Reg = Entry.second; - // Use the global register ID when querying APIs external to this - // pass. - if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) { - // Do not register clobbered definition for no ADRP. - // This definition is not used anyway (otherwise register - // allocation is wrong). - BBGen[Reg] = ADRPMode ? &MI : nullptr; - BBKillSet.set(Reg); - } - } - } - - // Process register defs. - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) - continue; - unsigned CurReg = MO.getReg(); - MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); - if (ItCurRegId == RegToId.end()) - continue; - - for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { - MapRegToId::const_iterator ItRegId = RegToId.find(*AI); - // If this alias has not been recorded, then it is not interesting - // for the current analysis. - // We can end up in this situation because of tuple registers. - // E.g., Let say we are interested in S1. When we register - // S1, we will also register its aliases and in particular - // the tuple Q1_Q2. - // Now, when we encounter Q1_Q2, we will look through its aliases - // and will find that S2 is not registered. - if (ItRegId == RegToId.end()) - continue; - - BBKillSet.set(ItRegId->second); - BBGen[ItRegId->second] = &MI; - } - BBGen[ItCurRegId->second] = &MI; - } - } - - // If we restrict our analysis to basic block scope, conservatively add a - // dummy - // use for each generated value. - if (!ADRPMode && DummyOp && !MBB.succ_empty()) - for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) - if (BBGen[CurReg]) - getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp); - } -} - -/// Reaching def core algorithm: -/// while an Out has changed -/// for each bb -/// for each color -/// In[bb][color] = U Out[bb.predecessors][color] -/// insert reachableUses[bb][color] in each in[bb][color] -/// op.reachedUses -/// -/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) -static void reachingDefAlgorithm(const MachineFunction &MF, - InstrToInstrs *ColorOpToReachedUses, - BlockToSetOfInstrsPerColor &In, - BlockToSetOfInstrsPerColor &Out, - BlockToInstrPerColor &Gen, BlockToRegSet &Kill, - BlockToSetOfInstrsPerColor &ReachableUses, - unsigned NbReg) { - bool HasChanged; - do { - HasChanged = false; - for (const MachineBasicBlock &MBB : MF) { - unsigned CurReg; - for (CurReg = 0; CurReg < NbReg; ++CurReg) { - SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); - SetOfMachineInstr &BBReachableUses = - getSet(ReachableUses, MBB, CurReg, NbReg); - SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); - unsigned Size = BBOutSet.size(); - // In[bb][color] = U Out[bb.predecessors][color] - for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { - SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); - BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); - } - // insert reachableUses[bb][color] in each in[bb][color] op.reachedses - for (const MachineInstr *MI : BBInSet) { - SetOfMachineInstr &OpReachedUses = - getUses(ColorOpToReachedUses, CurReg, *MI); - OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end()); - } - // Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) - if (!Kill[&MBB].test(CurReg)) - BBOutSet.insert(BBInSet.begin(), BBInSet.end()); - if (Gen[&MBB][CurReg]) - BBOutSet.insert(Gen[&MBB][CurReg]); - HasChanged |= BBOutSet.size() != Size; - } - } - } while (HasChanged); -} - -/// Reaching definition algorithm. -/// \param MF function on which the algorithm will operate. -/// \param[out] ColorOpToReachedUses will contain the result of the reaching -/// def algorithm. -/// \param ADRPMode specify whether the reaching def algorithm should be tuned -/// for ADRP optimization. \see initReachingDef for more details. -/// \param DummyOp if not NULL, the algorithm will work at -/// basic block scope and will set for every exposed definition a use to -/// @p DummyOp. -/// \pre ColorOpToReachedUses is an array of at least number of registers of -/// InstrToInstrs. -static void reachingDef(const MachineFunction &MF, - InstrToInstrs *ColorOpToReachedUses, - const MapRegToId &RegToId, bool ADRPMode = false, - const MachineInstr *DummyOp = nullptr) { - // structures: - // For each basic block. - // Out: a set per color of definitions that reach the - // out boundary of this block. - // In: Same as Out but for in boundary. - // Gen: generated color in this block (one operation per color). - // Kill: register set of killed color in this block. - // ReachableUses: a set per color of uses (operation) reachable - // for "In" definitions. - BlockToSetOfInstrsPerColor Out, In, ReachableUses; - BlockToInstrPerColor Gen; - BlockToRegSet Kill; - - // Initialize Gen, kill and reachableUses. - initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId, - DummyOp, ADRPMode); - - // Algo. - if (!DummyOp) - reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill, - ReachableUses, RegToId.size()); -} +} // end anonymous namespace. -#ifndef NDEBUG -/// print the result of the reaching definition algorithm. -static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses, - unsigned NbReg, const TargetRegisterInfo *TRI, - const MapIdToReg &IdToReg) { - unsigned CurReg; - for (CurReg = 0; CurReg < NbReg; ++CurReg) { - if (ColorOpToReachedUses[CurReg].empty()) - continue; - DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n"); +INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh", + AARCH64_COLLECT_LOH_NAME, false, false) - for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { - DEBUG(dbgs() << "Def:\n"); - DEBUG(DefsIt.first->print(dbgs())); - DEBUG(dbgs() << "Reachable uses:\n"); - for (const MachineInstr *MI : DefsIt.second) { - DEBUG(MI->print(dbgs())); - } - } +static bool canAddBePartOfLOH(const MachineInstr &MI) { + // Check immediate to see if the immediate is an address. + switch (MI.getOperand(2).getType()) { + default: + return false; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return true; } } -#endif // NDEBUG /// Answer the following question: Can Def be one of the definition /// involved in a part of a LOH? -static bool canDefBePartOfLOH(const MachineInstr *Def) { - unsigned Opc = Def->getOpcode(); +static bool canDefBePartOfLOH(const MachineInstr &MI) { // Accept ADRP, ADDLow and LOADGot. - switch (Opc) { + switch (MI.getOpcode()) { default: return false; case AArch64::ADRP: return true; case AArch64::ADDXri: - // Check immediate to see if the immediate is an address. - switch (Def->getOperand(2).getType()) { - default: - return false; - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_JumpTableIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_BlockAddress: - return true; - } + return canAddBePartOfLOH(MI); case AArch64::LDRXui: // Check immediate to see if the immediate is an address. - switch (Def->getOperand(2).getType()) { + switch (MI.getOperand(2).getType()) { default: return false; case MachineOperand::MO_GlobalAddress: - return true; + return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT; } } - // Unreachable. - return false; } /// Check whether the given instruction can the end of a LOH chain involving a /// store. -static bool isCandidateStore(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { +static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) { + switch (MI.getOpcode()) { default: return false; case AArch64::STRBBui: @@ -543,109 +211,19 @@ static bool isCandidateStore(const MachineInstr *Instr) { case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: + // We can only optimize the index operand. // In case we have str xA, [xA, #imm], this is two different uses // of xA and we cannot fold, otherwise the xA stored may be wrong, // even if #imm == 0. - if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg()) - return true; - } - return false; -} - -/// Given the result of a reaching definition algorithm in ColorOpToReachedUses, -/// Build the Use to Defs information and filter out obvious non-LOH candidates. -/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions. -/// In non-ADRPMode, non-LOH candidates are "uses" with several definition, -/// i.e., no simple chain. -/// \param ADRPMode -- \see initReachingDef. -static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, - const InstrToInstrs *ColorOpToReachedUses, - const MapRegToId &RegToId, - bool ADRPMode = false) { - - SetOfMachineInstr NotCandidate; - unsigned NbReg = RegToId.size(); - MapRegToId::const_iterator EndIt = RegToId.end(); - for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) { - // If this color is never defined, continue. - if (ColorOpToReachedUses[CurReg].empty()) - continue; - - for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { - for (const MachineInstr *MI : DefsIt.second) { - const MachineInstr *Def = DefsIt.first; - MapRegToId::const_iterator It; - // if all the reaching defs are not adrp, this use will not be - // simplifiable. - if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) || - (!ADRPMode && !canDefBePartOfLOH(Def)) || - (!ADRPMode && isCandidateStore(MI) && - // store are LOH candidate iff the end of the chain is used as - // base. - ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt || - It->second != CurReg))) { - NotCandidate.insert(MI); - continue; - } - // Do not consider self reaching as a simplifiable case for ADRP. - if (!ADRPMode || MI != DefsIt.first) { - UseToReachingDefs[MI].insert(DefsIt.first); - // If UsesIt has several reaching definitions, it is not - // candidate for simplificaton in non-ADRPMode. - if (!ADRPMode && UseToReachingDefs[MI].size() > 1) - NotCandidate.insert(MI); - } - } - } - } - for (const MachineInstr *Elem : NotCandidate) { - DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n"); - // It would have been better if we could just remove the entry - // from the map. Because of that, we have to filter the garbage - // (second.empty) in the subsequence analysis. - UseToReachingDefs[Elem].clear(); - } -} - -/// Based on the use to defs information (in ADRPMode), compute the -/// opportunities of LOH ADRP-related. -static void computeADRP(const InstrToInstrs &UseToDefs, - AArch64FunctionInfo &AArch64FI, - const MachineDominatorTree *MDT) { - DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); - for (const auto &Entry : UseToDefs) { - unsigned Size = Entry.second.size(); - if (Size == 0) - continue; - if (Size == 1) { - const MachineInstr *L2 = *Entry.second.begin(); - const MachineInstr *L1 = Entry.first; - if (!MDT->dominates(L2, L1)) { - DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1 - << '\n'); - continue; - } - DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n'); - AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, {L2, L1}); - ++NumADRPSimpleCandidate; - } -#ifndef NDEBUG - else if (Size == 2) - ++NumADRPComplexCandidate2; - else if (Size == 3) - ++NumADRPComplexCandidate3; - else - ++NumADRPComplexCandidateOther; -#endif - // if Size < 1, the use should have been removed from the candidates - assert(Size >= 1 && "No reaching defs for that use!"); + return MI.getOperandNo(&MO) == 1 && + MI.getOperand(0).getReg() != MI.getOperand(1).getReg(); } } /// Check whether the given instruction can be the end of a LOH chain /// involving a load. -static bool isCandidateLoad(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { +static bool isCandidateLoad(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return false; case AArch64::LDRSBWui: @@ -660,17 +238,13 @@ static bool isCandidateLoad(const MachineInstr *Instr) { case AArch64::LDRSui: case AArch64::LDRDui: case AArch64::LDRQui: - if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT) - return false; - return true; + return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT); } - // Unreachable. - return false; } /// Check whether the given instruction can load a litteral. -static bool supportLoadFromLiteral(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { +static bool supportLoadFromLiteral(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return false; case AArch64::LDRSWui: @@ -681,353 +255,233 @@ static bool supportLoadFromLiteral(const MachineInstr *Instr) { case AArch64::LDRQui: return true; } - // Unreachable. - return false; } -/// Check whether the given instruction is a LOH candidate. -/// \param UseToDefs is used to check that Instr is at the end of LOH supported -/// chain. -/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are -/// already been filtered out. -static bool isCandidate(const MachineInstr *Instr, - const InstrToInstrs &UseToDefs, - const MachineDominatorTree *MDT) { - if (!isCandidateLoad(Instr) && !isCandidateStore(Instr)) - return false; +/// Number of GPR registers traked by mapRegToGPRIndex() +static const unsigned N_GPR_REGS = 31; +/// Map register number to index from 0-30. +static int mapRegToGPRIndex(MCPhysReg Reg) { + static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs"); + static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs"); + if (AArch64::X0 <= Reg && Reg <= AArch64::X28) + return Reg - AArch64::X0; + if (AArch64::W0 <= Reg && Reg <= AArch64::W30) + return Reg - AArch64::W0; + // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to + // handle them as special cases. + if (Reg == AArch64::FP) + return 29; + if (Reg == AArch64::LR) + return 30; + return -1; +} - const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); - if (Def->getOpcode() != AArch64::ADRP) { - // At this point, Def is ADDXri or LDRXui of the right type of - // symbol, because we filtered out the uses that were not defined - // by these kind of instructions (+ ADRP). +/// State tracked per register. +/// The main algorithm walks backwards over a basic block maintaining this +/// datastructure for each tracked general purpose register. +struct LOHInfo { + MCLOHType Type : 8; ///< "Best" type of LOH possible. + bool IsCandidate : 1; ///< Possible LOH candidate. + bool OneUser : 1; ///< Found exactly one user (yet). + bool MultiUsers : 1; ///< Found multiple users. + const MachineInstr *MI0; ///< First instruction involved in the LOH. + const MachineInstr *MI1; ///< Second instruction involved in the LOH + /// (if any). + const MachineInstr *LastADRP; ///< Last ADRP in same register. +}; - // Check if this forms a simple chain: each intermediate node must - // dominates the next one. - if (!MDT->dominates(Def, Instr)) - return false; - // Move one node up in the simple chain. - if (UseToDefs.find(Def) == - UseToDefs.end() - // The map may contain garbage we have to ignore. - || - UseToDefs.find(Def)->second.empty()) - return false; - Instr = Def; - Def = *UseToDefs.find(Def)->second.begin(); +/// Update state \p Info given \p MI uses the tracked register. +static void handleUse(const MachineInstr &MI, const MachineOperand &MO, + LOHInfo &Info) { + // We have multiple uses if we already found one before. + if (Info.MultiUsers || Info.OneUser) { + Info.IsCandidate = false; + Info.MultiUsers = true; + return; } - // Check if we reached the top of the simple chain: - // - top is ADRP. - // - check the simple chain property: each intermediate node must - // dominates the next one. - if (Def->getOpcode() == AArch64::ADRP) - return MDT->dominates(Def, Instr); - return false; -} - -static bool registerADRCandidate(const MachineInstr &Use, - const InstrToInstrs &UseToDefs, - const InstrToInstrs *DefsPerColorToUses, - AArch64FunctionInfo &AArch64FI, - SetOfMachineInstr *InvolvedInLOHs, - const MapRegToId &RegToId) { - // Look for opportunities to turn ADRP -> ADD or - // ADRP -> LDR GOTPAGEOFF into ADR. - // If ADRP has more than one use. Give up. - if (Use.getOpcode() != AArch64::ADDXri && - (Use.getOpcode() != AArch64::LDRXui || - !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT))) - return false; - InstrToInstrs::const_iterator It = UseToDefs.find(&Use); - // The map may contain garbage that we need to ignore. - if (It == UseToDefs.end() || It->second.empty()) - return false; - const MachineInstr &Def = **It->second.begin(); - if (Def.getOpcode() != AArch64::ADRP) - return false; - // Check the number of users of ADRP. - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def.getOperand(0).getReg())->second, Def); - if (Users->size() > 1) { - ++NumADRComplexCandidate; - return false; + Info.OneUser = true; + + // Start new LOHInfo if applicable. + if (isCandidateLoad(MI)) { + Info.Type = MCLOH_AdrpLdr; + Info.IsCandidate = true; + Info.MI0 = &MI; + // Note that even this is AdrpLdr now, we can switch to a Ldr variant + // later. + } else if (isCandidateStore(MI, MO)) { + Info.Type = MCLOH_AdrpAddStr; + Info.IsCandidate = true; + Info.MI0 = &MI; + Info.MI1 = nullptr; + } else if (MI.getOpcode() == AArch64::ADDXri) { + Info.Type = MCLOH_AdrpAdd; + Info.IsCandidate = true; + Info.MI0 = &MI; + } else if (MI.getOpcode() == AArch64::LDRXui && + MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) { + Info.Type = MCLOH_AdrpLdrGot; + Info.IsCandidate = true; + Info.MI0 = &MI; } - ++NumADRSimpleCandidate; - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) && - "ADRP already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) && - "ADD already involved in LOH."); - DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n'); - - AArch64FI.addLOHDirective( - Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot, - {&Def, &Use}); - return true; } -/// Based on the use to defs information (in non-ADRPMode), compute the -/// opportunities of LOH non-ADRP-related -static void computeOthers(const InstrToInstrs &UseToDefs, - const InstrToInstrs *DefsPerColorToUses, - AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId, - const MachineDominatorTree *MDT) { - SetOfMachineInstr *InvolvedInLOHs = nullptr; -#ifndef NDEBUG - SetOfMachineInstr InvolvedInLOHsStorage; - InvolvedInLOHs = &InvolvedInLOHsStorage; -#endif // NDEBUG - DEBUG(dbgs() << "*** Compute LOH for Others\n"); - // ADRP -> ADD/LDR -> LDR/STR pattern. - // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern. +/// Update state \p Info given the tracked register is clobbered. +static void handleClobber(LOHInfo &Info) { + Info.IsCandidate = false; + Info.OneUser = false; + Info.MultiUsers = false; + Info.LastADRP = nullptr; +} - // FIXME: When the statistics are not important, - // This initial filtering loop can be merged into the next loop. - // Currently, we didn't do it to have the same code for both DEBUG and - // NDEBUG builds. Indeed, the iterator of the second loop would need - // to be changed. - SetOfMachineInstr PotentialCandidates; - SetOfMachineInstr PotentialADROpportunities; - for (auto &Use : UseToDefs) { - // If no definition is available, this is a non candidate. - if (Use.second.empty()) - continue; - // Keep only instructions that are load or store and at the end of - // a ADRP -> ADD/LDR/Nothing chain. - // We already filtered out the no-chain cases. - if (!isCandidate(Use.first, UseToDefs, MDT)) { - PotentialADROpportunities.insert(Use.first); - continue; +/// Update state \p Info given that \p MI is possibly the middle instruction +/// of an LOH involving 3 instructions. +static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo, + LOHInfo &OpInfo) { + if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser)) + return false; + // Copy LOHInfo for dest register to LOHInfo for source register. + if (&DefInfo != &OpInfo) { + OpInfo = DefInfo; + // Invalidate \p DefInfo because we track it in \p OpInfo now. + handleClobber(DefInfo); + } else + DefInfo.LastADRP = nullptr; + + // Advance state machine. + assert(OpInfo.IsCandidate && "Expect valid state"); + if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) { + if (OpInfo.Type == MCLOH_AdrpLdr) { + OpInfo.Type = MCLOH_AdrpAddLdr; + OpInfo.IsCandidate = true; + OpInfo.MI1 = &MI; + return true; + } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { + OpInfo.Type = MCLOH_AdrpAddStr; + OpInfo.IsCandidate = true; + OpInfo.MI1 = &MI; + return true; } - PotentialCandidates.insert(Use.first); - } - - // Make the following distinctions for statistics as the linker does - // know how to decode instructions: - // - ADD/LDR/Nothing make there different patterns. - // - LDR/STR make two different patterns. - // Hence, 6 - 1 base patterns. - // (because ADRP-> Nothing -> STR is not simplifiable) - - // The linker is only able to have a simple semantic, i.e., if pattern A - // do B. - // However, we want to see the opportunity we may miss if we were able to - // catch more complex cases. - - // PotentialCandidates are result of a chain ADRP -> ADD/LDR -> - // A potential candidate becomes a candidate, if its current immediate - // operand is zero and all nodes of the chain have respectively only one user -#ifndef NDEBUG - SetOfMachineInstr DefsOfPotentialCandidates; -#endif - for (const MachineInstr *Candidate : PotentialCandidates) { - // Get the definition of the candidate i.e., ADD or LDR. - const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin(); - // Record the elements of the chain. - const MachineInstr *L1 = Def; - const MachineInstr *L2 = nullptr; - unsigned ImmediateDefOpc = Def->getOpcode(); - if (Def->getOpcode() != AArch64::ADRP) { - // Check the number of users of this node. - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, *Def); - if (Users->size() > 1) { -#ifndef NDEBUG - // if all the uses of this def are in potential candidate, this is - // a complex candidate of level 2. - bool IsLevel2 = true; - for (const MachineInstr *MI : *Users) { - if (!PotentialCandidates.count(MI)) { - ++NumTooCplxLvl2; - IsLevel2 = false; - break; - } - } - if (IsLevel2) - ++NumCplxLvl2; -#endif // NDEBUG - PotentialADROpportunities.insert(Def); - continue; - } - L2 = Def; - Def = *UseToDefs.find(Def)->second.begin(); - L1 = Def; - } // else the element in the middle of the chain is nothing, thus - // Def already contains the first element of the chain. - - // Check the number of users of the first node in the chain, i.e., ADRP - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, *Def); - if (Users->size() > 1) { -#ifndef NDEBUG - // if all the uses of this def are in the defs of the potential candidate, - // this is a complex candidate of level 1 - if (DefsOfPotentialCandidates.empty()) { - // lazy init - DefsOfPotentialCandidates = PotentialCandidates; - for (const MachineInstr *Candidate : PotentialCandidates) { - if (!UseToDefs.find(Candidate)->second.empty()) - DefsOfPotentialCandidates.insert( - *UseToDefs.find(Candidate)->second.begin()); - } - } - bool Found = false; - for (auto &Use : *Users) { - if (!DefsOfPotentialCandidates.count(Use)) { - ++NumTooCplxLvl1; - Found = true; - break; - } - } - if (!Found) - ++NumCplxLvl1; -#endif // NDEBUG - continue; + } else { + assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui"); + assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) && + "Expected GOT relocation"); + if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { + OpInfo.Type = MCLOH_AdrpLdrGotStr; + OpInfo.IsCandidate = true; + OpInfo.MI1 = &MI; + return true; + } else if (OpInfo.Type == MCLOH_AdrpLdr) { + OpInfo.Type = MCLOH_AdrpLdrGotLdr; + OpInfo.IsCandidate = true; + OpInfo.MI1 = &MI; + return true; } + } + return false; +} - bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri); - // If the chain is three instructions long and ldr is the second element, - // then this ldr must load form GOT, otherwise this is not a correct chain. - if (L2 && !IsL2Add && - !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)) - continue; - SmallVector<const MachineInstr *, 3> Args; - MCLOHType Kind; - if (isCandidateLoad(Candidate)) { - if (!L2) { - // At this point, the candidate LOH indicates that the ldr instruction - // may use a direct access to the symbol. There is not such encoding - // for loads of byte and half. - if (!supportLoadFromLiteral(Candidate)) - continue; +/// Update state when seeing and ADRP instruction. +static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI, + LOHInfo &Info) { + if (Info.LastADRP != nullptr) { + DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n" << '\t' << MI << '\t' + << *Info.LastADRP); + AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP}); + ++NumADRPSimpleCandidate; + } - DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate - << '\n'); - Kind = MCLOH_AdrpLdr; - Args.push_back(L1); - Args.push_back(Candidate); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); + // Produce LOH directive if possible. + if (Info.IsCandidate) { + switch (Info.Type) { + case MCLOH_AdrpAdd: + DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n" << '\t' << MI << '\t' + << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0}); + ++NumADRSimpleCandidate; + break; + case MCLOH_AdrpLdr: + if (supportLoadFromLiteral(*Info.MI0)) { + DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n" << '\t' << MI << '\t' + << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0}); ++NumADRPToLDR; - } else { - DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") - << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate - << '\n'); - - Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr; - Args.push_back(L1); - Args.push_back(L2); - Args.push_back(Candidate); - - PotentialADROpportunities.remove(L2); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && - "L2 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); -#ifndef NDEBUG - // get the immediate of the load - if (Candidate->getOperand(2).getImm() == 0) - if (ImmediateDefOpc == AArch64::ADDXri) - ++NumADDToLDR; - else - ++NumLDRToLDR; - else if (ImmediateDefOpc == AArch64::ADDXri) - ++NumADDToLDRWithImm; - else - ++NumLDRToLDRWithImm; -#endif // NDEBUG } - } else { - if (ImmediateDefOpc == AArch64::ADRP) - continue; - else { - - DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") - << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate - << '\n'); - - Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr; - Args.push_back(L1); - Args.push_back(L2); - Args.push_back(Candidate); - - PotentialADROpportunities.remove(L2); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && - "L2 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); -#ifndef NDEBUG - // get the immediate of the store - if (Candidate->getOperand(2).getImm() == 0) - if (ImmediateDefOpc == AArch64::ADDXri) - ++NumADDToSTR; - else - ++NumLDRToSTR; - else if (ImmediateDefOpc == AArch64::ADDXri) - ++NumADDToSTRWithImm; - else - ++NumLDRToSTRWithImm; -#endif // DEBUG + break; + case MCLOH_AdrpAddLdr: + DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" << '\t' << MI << '\t' + << *Info.MI1 << '\t' << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0}); + ++NumADDToLDR; + break; + case MCLOH_AdrpAddStr: + if (Info.MI1 != nullptr) { + DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" << '\t' << MI << '\t' + << *Info.MI1 << '\t' << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0}); + ++NumADDToSTR; } + break; + case MCLOH_AdrpLdrGotLdr: + DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n" << '\t' << MI << '\t' + << *Info.MI1 << '\t' << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0}); + ++NumLDRToLDR; + break; + case MCLOH_AdrpLdrGotStr: + DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n" << '\t' << MI << '\t' + << *Info.MI1 << '\t' << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0}); + ++NumLDRToSTR; + break; + case MCLOH_AdrpLdrGot: + DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n" << '\t' << MI << '\t' + << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0}); + break; + case MCLOH_AdrpAdrp: + llvm_unreachable("MCLOH_AdrpAdrp not used in state machine"); } - AArch64FI.addLOHDirective(Kind, Args); } - // Now, we grabbed all the big patterns, check ADR opportunities. - for (const MachineInstr *Candidate : PotentialADROpportunities) - registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI, - InvolvedInLOHs, RegToId); + handleClobber(Info); + Info.LastADRP = &MI; } -/// Look for every register defined by potential LOHs candidates. -/// Map these registers with dense id in @p RegToId and vice-versa in -/// @p IdToReg. @p IdToReg is populated only in DEBUG mode. -static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId, - MapIdToReg &IdToReg, - const TargetRegisterInfo *TRI) { - unsigned CurRegId = 0; - if (!PreCollectRegister) { - unsigned NbReg = TRI->getNumRegs(); - for (; CurRegId < NbReg; ++CurRegId) { - RegToId[CurRegId] = CurRegId; - DEBUG(IdToReg.push_back(CurRegId)); - DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches")); - } +static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg, + LOHInfo *LOHInfos) { + if (!MachineOperand::clobbersPhysReg(RegMask, Reg)) return; - } - - DEBUG(dbgs() << "** Collect Involved Register\n"); - for (const auto &MBB : MF) { - for (const MachineInstr &MI : MBB) { - if (!canDefBePartOfLOH(&MI) && - !isCandidateLoad(&MI) && !isCandidateStore(&MI)) - continue; + int Idx = mapRegToGPRIndex(Reg); + if (Idx >= 0) + handleClobber(LOHInfos[Idx]); +} - // Process defs - for (MachineInstr::const_mop_iterator IO = MI.operands_begin(), - IOEnd = MI.operands_end(); - IO != IOEnd; ++IO) { - if (!IO->isReg() || !IO->isDef()) - continue; - unsigned CurReg = IO->getReg(); - for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) - if (RegToId.find(*AI) == RegToId.end()) { - DEBUG(IdToReg.push_back(*AI); - assert(IdToReg[CurRegId] == *AI && - "Reg index mismatches insertion index.")); - RegToId[*AI] = CurRegId++; - DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n'); - } - } +static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) { + // Handle defs and regmasks. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) { + const uint32_t *RegMask = MO.getRegMask(); + for (MCPhysReg Reg : AArch64::GPR32RegClass) + handleRegMaskClobber(RegMask, Reg, LOHInfos); + for (MCPhysReg Reg : AArch64::GPR64RegClass) + handleRegMaskClobber(RegMask, Reg, LOHInfos); + continue; } + if (!MO.isReg() || !MO.isDef()) + continue; + int Idx = mapRegToGPRIndex(MO.getReg()); + if (Idx < 0) + continue; + handleClobber(LOHInfos[Idx]); + } + // Handle uses. + for (const MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + int Idx = mapRegToGPRIndex(MO.getReg()); + if (Idx < 0) + continue; + handleUse(MI, MO, LOHInfos[Idx]); } } @@ -1035,74 +489,59 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>(); - - MapRegToId RegToId; - MapIdToReg IdToReg; - AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>(); - assert(AArch64FI && "No MachineFunctionInfo for this function!"); - - DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n'); + DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n" + << "Looking in function " << MF.getName() << '\n'); - collectInvolvedReg(MF, RegToId, IdToReg, TRI); - if (RegToId.empty()) - return false; + LOHInfo LOHInfos[N_GPR_REGS]; + AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); + for (const MachineBasicBlock &MBB : MF) { + // Reset register tracking state. + memset(LOHInfos, 0, sizeof(LOHInfos)); + // Live-out registers are used. + for (const MachineBasicBlock *Succ : MBB.successors()) { + for (const auto &LI : Succ->liveins()) { + int RegIdx = mapRegToGPRIndex(LI.PhysReg); + if (RegIdx >= 0) + LOHInfos[RegIdx].OneUser = true; + } + } - MachineInstr *DummyOp = nullptr; - if (BasicBlockScopeOnly) { - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - // For local analysis, create a dummy operation to record uses that are not - // local. - DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc()); + // Walk the basic block backwards and update the per register state machine + // in the process. + for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case AArch64::ADDXri: + case AArch64::LDRXui: + if (canDefBePartOfLOH(MI)) { + const MachineOperand &Def = MI.getOperand(0); + const MachineOperand &Op = MI.getOperand(1); + assert(Def.isReg() && Def.isDef() && "Expected reg def"); + assert(Op.isReg() && Op.isUse() && "Expected reg use"); + int DefIdx = mapRegToGPRIndex(Def.getReg()); + int OpIdx = mapRegToGPRIndex(Op.getReg()); + if (DefIdx >= 0 && OpIdx >= 0 && + handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx])) + continue; + } + break; + case AArch64::ADRP: + const MachineOperand &Op0 = MI.getOperand(0); + int Idx = mapRegToGPRIndex(Op0.getReg()); + if (Idx >= 0) { + handleADRP(MI, AFI, LOHInfos[Idx]); + continue; + } + break; + } + handleNormalInst(MI, LOHInfos); + } } - unsigned NbReg = RegToId.size(); - bool Modified = false; - - // Start with ADRP. - InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg]; - - // Compute the reaching def in ADRP mode, meaning ADRP definitions - // are first considered as uses. - reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp); - DEBUG(dbgs() << "ADRP reaching defs\n"); - DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); - - // Translate the definition to uses map into a use to definitions map to ease - // statistic computation. - InstrToInstrs ADRPToReachingDefs; - reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true); - - // Compute LOH for ADRP. - computeADRP(ADRPToReachingDefs, *AArch64FI, MDT); - delete[] ColorOpToReachedUses; - - // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern. - ColorOpToReachedUses = new InstrToInstrs[NbReg]; - - // first perform a regular reaching def analysis. - reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp); - DEBUG(dbgs() << "All reaching defs\n"); - DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); - - // Turn that into a use to defs to ease statistic computation. - InstrToInstrs UsesToReachingDefs; - reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false); - - // Compute other than AdrpAdrp LOH. - computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId, - MDT); - delete[] ColorOpToReachedUses; - - if (BasicBlockScopeOnly) - MF.DeleteMachineInstr(DummyOp); - - return Modified; + // Return "no change": The pass only collects information. + return false; } -/// createAArch64CollectLOHPass - returns an instance of the Statistic for -/// linker optimization pass. FunctionPass *llvm::createAArch64CollectLOHPass() { return new AArch64CollectLOH(); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4c98253878e4..74a01835171b 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11,28 +11,79 @@ // //===----------------------------------------------------------------------===// -#include "AArch64ISelLowering.h" #include "AArch64CallingConvention.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64ISelLowering.h" #include "AArch64PerfectShuffle.h" +#include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" -#include "AArch64TargetMachine.h" -#include "AArch64TargetObjectFile.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetCallingConv.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <bitset> +#include <cassert> +#include <cctype> +#include <cstdint> +#include <cstdlib> +#include <iterator> +#include <limits> +#include <tuple> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "aarch64-lower" @@ -59,7 +110,6 @@ static const MVT MVT_CC = MVT::i32; AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI) : TargetLowering(TM), Subtarget(&STI) { - // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so // we have to make something up. Arbitrarily, choose ZeroOrOne. setBooleanContents(ZeroOrOneBooleanContent); @@ -218,7 +268,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::CTPOP, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i64, Custom); @@ -3632,6 +3681,7 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, llvm_unreachable("Unexpected platform trying to use TLS"); } + SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); @@ -4549,7 +4599,6 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, return DAG.getMergeValues(Ops, dl); } - /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i64 values and take a 2 x i64 value to shift plus a shift amount. SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, @@ -5074,10 +5123,11 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, int WindowBase; int WindowScale; - bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } ShuffleSourceInfo(SDValue Vec) - : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0), - WindowScale(1) {} + : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0), + ShuffleVec(Vec), WindowBase(0), WindowScale(1) {} + + bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } }; // First gather all vectors used as an immediate source for this BUILD_VECTOR @@ -7028,7 +7078,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } case Intrinsic::aarch64_ldaxp: - case Intrinsic::aarch64_ldxp: { + case Intrinsic::aarch64_ldxp: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(0); @@ -7038,9 +7088,8 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.readMem = true; Info.writeMem = false; return true; - } case Intrinsic::aarch64_stlxp: - case Intrinsic::aarch64_stxp: { + case Intrinsic::aarch64_stxp: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i128; Info.ptrVal = I.getArgOperand(2); @@ -7050,7 +7099,6 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.readMem = false; Info.writeMem = true; return true; - } default: break; } @@ -8044,13 +8092,13 @@ static SDValue tryCombineToEXTR(SDNode *N, SDValue LHS; uint32_t ShiftLHS = 0; - bool LHSFromHi = 0; + bool LHSFromHi = false; if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) return SDValue(); SDValue RHS; uint32_t ShiftRHS = 0; - bool RHSFromHi = 0; + bool RHSFromHi = false; if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) return SDValue(); @@ -9732,52 +9780,51 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width, switch(CC) { case AArch64CC::LE: - case AArch64CC::GT: { + case AArch64CC::GT: if ((AddConstant == 0) || (CompConstant == MaxUInt - 1 && AddConstant < 0) || (AddConstant >= 0 && CompConstant < 0) || (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant)) return true; - } break; + break; case AArch64CC::LT: - case AArch64CC::GE: { + case AArch64CC::GE: if ((AddConstant == 0) || (AddConstant >= 0 && CompConstant <= 0) || (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant)) return true; - } break; + break; case AArch64CC::HI: - case AArch64CC::LS: { + case AArch64CC::LS: if ((AddConstant >= 0 && CompConstant < 0) || (AddConstant <= 0 && CompConstant >= -1 && CompConstant < AddConstant + MaxUInt)) return true; - } break; + break; case AArch64CC::PL: - case AArch64CC::MI: { + case AArch64CC::MI: if ((AddConstant == 0) || (AddConstant > 0 && CompConstant <= 0) || (AddConstant < 0 && CompConstant <= AddConstant)) return true; - } break; + break; case AArch64CC::LO: - case AArch64CC::HS: { + case AArch64CC::HS: if ((AddConstant >= 0 && CompConstant <= 0) || (AddConstant <= 0 && CompConstant >= 0 && CompConstant <= AddConstant + MaxUInt)) return true; - } break; + break; case AArch64CC::EQ: - case AArch64CC::NE: { + case AArch64CC::NE: if ((AddConstant > 0 && CompConstant < 0) || (AddConstant < 0 && CompConstant >= 0 && CompConstant < AddConstant + MaxUInt) || (AddConstant >= 0 && CompConstant >= 0 && CompConstant >= AddConstant) || (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant)) - return true; - } break; + break; case AArch64CC::VS: case AArch64CC::VC: case AArch64CC::AL: @@ -10501,7 +10548,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, if (ValTy->getPrimitiveSizeInBits() == 128) { Intrinsic::ID Int = IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp; - Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int); + Function *Ldxr = Intrinsic::getDeclaration(M, Int); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi"); @@ -10517,7 +10564,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr; - Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys); + Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateTruncOrBitCast( Builder.CreateCall(Ldxr, Addr), @@ -10527,8 +10574,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance( IRBuilder<> &Builder) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - Builder.CreateCall( - llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex)); + Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex)); } Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 626c934f236e..5c8acba26aab 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -14,16 +14,37 @@ #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" -#include <algorithm> +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <utility> using namespace llvm; @@ -529,19 +550,19 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, default: llvm_unreachable("Unknown branch opcode in Cond"); case AArch64::CBZW: - Is64Bit = 0; + Is64Bit = false; CC = AArch64CC::EQ; break; case AArch64::CBZX: - Is64Bit = 1; + Is64Bit = true; CC = AArch64CC::EQ; break; case AArch64::CBNZW: - Is64Bit = 0; + Is64Bit = false; CC = AArch64CC::NE; break; case AArch64::CBNZX: - Is64Bit = 1; + Is64Bit = true; CC = AArch64CC::NE; break; } @@ -1044,7 +1065,7 @@ static unsigned sForm(MachineInstr &Instr) { case AArch64::SUBSWri: case AArch64::SUBSXrr: case AArch64::SUBSXri: - return Instr.getOpcode();; + return Instr.getOpcode(); case AArch64::ADDWrr: return AArch64::ADDSWrr; case AArch64::ADDWri: return AArch64::ADDSWri; @@ -1072,12 +1093,15 @@ static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) { } namespace { + struct UsedNZCV { - bool N; - bool Z; - bool C; - bool V; - UsedNZCV(): N(false), Z(false), C(false), V(false) {} + bool N = false; + bool Z = false; + bool C = false; + bool V = false; + + UsedNZCV() = default; + UsedNZCV& operator |=(const UsedNZCV& UsedFlags) { this->N |= UsedFlags.N; this->Z |= UsedFlags.Z; @@ -1086,6 +1110,7 @@ struct UsedNZCV { return *this; } }; + } // end anonymous namespace /// Find a condition code used by the instruction. @@ -1561,7 +1586,7 @@ bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const { /// Check all MachineMemOperands for a hint to suppress pairing. bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const { - return any_of(MI.memoperands(), [](MachineMemOperand *MMO) { + return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { return MMO->getFlags() & MOSuppressPair; }); } @@ -1994,7 +2019,7 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, void AArch64InstrInfo::copyPhysRegTuple( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, - llvm::ArrayRef<unsigned> Indices) const { + ArrayRef<unsigned> Indices) const { assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -2583,7 +2608,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // // <rdar://problem/11522048> // - if (MI.isCopy()) { + if (MI.isFullCopy()) { unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); if (SrcReg == AArch64::SP && @@ -2598,7 +2623,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( } } - // Handle the case where a copy is being spilled or refilled but the source + // Handle the case where a copy is being spilled or filled but the source // and destination register class don't match. For example: // // %vreg0<def> = COPY %XZR; GPR64common:%vreg0 @@ -2613,7 +2638,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // // %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1 // - // will be refilled as + // will be filled as // // LDRDui %vreg0, fi<#0> // @@ -2622,9 +2647,11 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // LDRXui %vregTemp, fi<#0> // %vreg0 = FMOV %vregTemp // - if (MI.isFullCopy() && Ops.size() == 1 && + if (MI.isCopy() && Ops.size() == 1 && // Make sure we're only folding the explicit COPY defs/uses. (Ops[0] == 0 || Ops[0] == 1)) { + bool IsSpill = Ops[0] == 0; + bool IsFill = !IsSpill; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineBasicBlock &MBB = *MI.getParent(); @@ -2632,21 +2659,112 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( const MachineOperand &SrcMO = MI.getOperand(1); unsigned DstReg = DstMO.getReg(); unsigned SrcReg = SrcMO.getReg(); + // This is slightly expensive to compute for physical regs since + // getMinimalPhysRegClass is slow. auto getRegClass = [&](unsigned Reg) { return TargetRegisterInfo::isVirtualRegister(Reg) ? MRI.getRegClass(Reg) : TRI.getMinimalPhysRegClass(Reg); }; - const TargetRegisterClass &DstRC = *getRegClass(DstReg); - const TargetRegisterClass &SrcRC = *getRegClass(SrcReg); - if (DstRC.getSize() == SrcRC.getSize()) { - if (Ops[0] == 0) + + if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { + assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() && + "Mismatched register size in non subreg COPY"); + if (IsSpill) storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, - &SrcRC, &TRI); + getRegClass(SrcReg), &TRI); else - loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI); + loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, + getRegClass(DstReg), &TRI); return &*--InsertPt; } + + // Handle cases like spilling def of: + // + // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0 + // + // where the physical register source can be widened and stored to the full + // virtual reg destination stack slot, in this case producing: + // + // STRXui %XZR, <fi#0> + // + if (IsSpill && DstMO.isUndef() && + TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + assert(SrcMO.getSubReg() == 0 && + "Unexpected subreg on physical register"); + const TargetRegisterClass *SpillRC; + unsigned SpillSubreg; + switch (DstMO.getSubReg()) { + default: + SpillRC = nullptr; + break; + case AArch64::sub_32: + case AArch64::ssub: + if (AArch64::GPR32RegClass.contains(SrcReg)) { + SpillRC = &AArch64::GPR64RegClass; + SpillSubreg = AArch64::sub_32; + } else if (AArch64::FPR32RegClass.contains(SrcReg)) { + SpillRC = &AArch64::FPR64RegClass; + SpillSubreg = AArch64::ssub; + } else + SpillRC = nullptr; + break; + case AArch64::dsub: + if (AArch64::FPR64RegClass.contains(SrcReg)) { + SpillRC = &AArch64::FPR128RegClass; + SpillSubreg = AArch64::dsub; + } else + SpillRC = nullptr; + break; + } + + if (SpillRC) + if (unsigned WidenedSrcReg = + TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) { + storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(), + FrameIndex, SpillRC, &TRI); + return &*--InsertPt; + } + } + + // Handle cases like filling use of: + // + // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1 + // + // where we can load the full virtual reg source stack slot, into the subreg + // destination, in this case producing: + // + // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0> + // + if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { + const TargetRegisterClass *FillRC; + switch (DstMO.getSubReg()) { + default: + FillRC = nullptr; + break; + case AArch64::sub_32: + FillRC = &AArch64::GPR32RegClass; + break; + case AArch64::ssub: + FillRC = &AArch64::FPR32RegClass; + break; + case AArch64::dsub: + FillRC = &AArch64::FPR64RegClass; + break; + } + + if (FillRC) { + assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() && + "Mismatched regclass size on folded subreg COPY"); + loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); + MachineInstr &LoadMI = *--InsertPt; + MachineOperand &LoadDst = LoadMI.getOperand(0); + assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load"); + LoadDst.setSubReg(DstMO.getSubReg()); + LoadDst.setIsUndef(); + return &LoadMI; + } + } } // Cannot fold. @@ -2936,7 +3054,7 @@ bool AArch64InstrInfo::useMachineCombiner() const { return true; } -// + // True when Opc sets flag static bool isCombineInstrSettingFlag(unsigned Opc) { switch (Opc) { @@ -2955,7 +3073,7 @@ static bool isCombineInstrSettingFlag(unsigned Opc) { } return false; } -// + // 32b Opcodes that can be combined with a MUL static bool isCombineInstrCandidate32(unsigned Opc) { switch (Opc) { @@ -2974,7 +3092,7 @@ static bool isCombineInstrCandidate32(unsigned Opc) { } return false; } -// + // 64b Opcodes that can be combined with a MUL static bool isCombineInstrCandidate64(unsigned Opc) { switch (Opc) { @@ -2993,7 +3111,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) { } return false; } -// + // FP Opcodes that can be combined with a FMUL static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { switch (Inst.getOpcode()) { @@ -3009,13 +3127,13 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { case AArch64::FSUBv2f32: case AArch64::FSUBv2f64: case AArch64::FSUBv4f32: - TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; - return (Options.UnsafeFPMath || - Options.AllowFPOpFusion == FPOpFusion::Fast); + TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; + return (Options.UnsafeFPMath || + Options.AllowFPOpFusion == FPOpFusion::Fast); } return false; } -// + // Opcodes that can be combined with a MUL static bool isCombineInstrCandidate(unsigned Opc) { return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc)); @@ -3205,7 +3323,7 @@ static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns) { if (!isCombineInstrCandidateFP(Root)) - return 0; + return false; MachineBasicBlock &MBB = *Root.getParent(); bool Found = false; @@ -3971,8 +4089,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence( // Record MUL and ADD/SUB for deletion DelInstrs.push_back(MUL); DelInstrs.push_back(&Root); - - return; } /// \brief Replace csincr-branch sequence by simple conditional branch @@ -4148,6 +4264,7 @@ AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { ArrayRef<std::pair<unsigned, const char *>> AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { using namespace AArch64II; + static const std::pair<unsigned, const char *> TargetFlags[] = { {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"}, @@ -4162,6 +4279,7 @@ AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const { ArrayRef<std::pair<unsigned, const char *>> AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { using namespace AArch64II; + static const std::pair<unsigned, const char *> TargetFlags[] = { {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 90b2c0896872..5037866925d3 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -162,6 +162,10 @@ public: int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + // This tells target independent code that it is okay to pass instructions + // with subreg operands to foldMemoryOperandImpl. + bool isSubregFoldable() const override { return true; } + using TargetInstrInfo::foldMemoryOperandImpl; MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 20de07424c53..b51473524c72 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -1071,8 +1071,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { return false; } - const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( - (CmpInst::Predicate)I.getOperand(1).getPredicate()); + // CSINC increments the result by one when the condition code is false. + // Therefore, we have to invert the predicate to get an increment by 1 when + // the predicate is true. + const AArch64CC::CondCode invCC = + changeICMPPredToAArch64CC(CmpInst::getInversePredicate( + (CmpInst::Predicate)I.getOperand(1).getPredicate())); MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc)) .addDef(ZReg) @@ -1084,7 +1088,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { .addDef(I.getOperand(0).getReg()) .addUse(AArch64::WZR) .addUse(AArch64::WZR) - .addImm(CC); + .addImm(invCC); constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI); constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h index 0d44e696ac20..2c6e5a912fb7 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h @@ -17,6 +17,7 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" namespace llvm { + class AArch64InstrInfo; class AArch64RegisterBankInfo; class AArch64RegisterInfo; @@ -29,7 +30,7 @@ public: const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI); - virtual bool select(MachineInstr &I) const override; + bool select(MachineInstr &I) const override; private: /// tblgen-erated 'select' implementation, used as the initial selector for @@ -43,5 +44,6 @@ private: const AArch64RegisterBankInfo &RBI; }; -} // End llvm namespace. -#endif +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index ca2860afe13d..f0bffe544158 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -14,17 +14,18 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/MC/MCLinkerOptimizationHint.h" +#include <cassert> namespace llvm { /// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and /// contains private AArch64-specific information for each MachineFunction. class AArch64FunctionInfo final : public MachineFunctionInfo { - /// Number of bytes of arguments this function has on the stack. If the callee /// is expected to restore the argument stack this should be a multiple of 16, /// all usable during a tail call. @@ -34,16 +35,16 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// space to a function with 16-bytes then misalignment of this value would /// make a stack adjustment necessary, which could not be undone by the /// callee. - unsigned BytesInStackArgArea; + unsigned BytesInStackArgArea = 0; /// The number of bytes to restore to deallocate space for incoming /// arguments. Canonically 0 in the C calling convention, but non-zero when /// callee is expected to pop the args. - unsigned ArgumentStackToRestore; + unsigned ArgumentStackToRestore = 0; /// HasStackFrame - True if this function has a stack frame. Set by /// determineCalleeSaves(). - bool HasStackFrame; + bool HasStackFrame = false; /// \brief Amount of stack frame size, not including callee-saved registers. unsigned LocalStackSize; @@ -53,54 +54,44 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// \brief Number of TLS accesses using the special (combinable) /// _TLS_MODULE_BASE_ symbol. - unsigned NumLocalDynamicTLSAccesses; + unsigned NumLocalDynamicTLSAccesses = 0; /// \brief FrameIndex for start of varargs area for arguments passed on the /// stack. - int VarArgsStackIndex; + int VarArgsStackIndex = 0; /// \brief FrameIndex for start of varargs area for arguments passed in /// general purpose registers. - int VarArgsGPRIndex; + int VarArgsGPRIndex = 0; /// \brief Size of the varargs area for arguments passed in general purpose /// registers. - unsigned VarArgsGPRSize; + unsigned VarArgsGPRSize = 0; /// \brief FrameIndex for start of varargs area for arguments passed in /// floating-point registers. - int VarArgsFPRIndex; + int VarArgsFPRIndex = 0; /// \brief Size of the varargs area for arguments passed in floating-point /// registers. - unsigned VarArgsFPRSize; + unsigned VarArgsFPRSize = 0; /// True if this function has a subset of CSRs that is handled explicitly via /// copies. - bool IsSplitCSR; + bool IsSplitCSR = false; /// True when the stack gets realigned dynamically because the size of stack /// frame is unknown at compile time. e.g., in case of VLAs. - bool StackRealigned; + bool StackRealigned = false; /// True when the callee-save stack area has unused gaps that may be used for /// other stack allocations. - bool CalleeSaveStackHasFreeSpace; + bool CalleeSaveStackHasFreeSpace = false; public: - AArch64FunctionInfo() - : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), - NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), - VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), - IsSplitCSR(false), StackRealigned(false), - CalleeSaveStackHasFreeSpace(false) {} - - explicit AArch64FunctionInfo(MachineFunction &MF) - : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), - NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), - VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), - IsSplitCSR(false), StackRealigned(false), - CalleeSaveStackHasFreeSpace(false) { + AArch64FunctionInfo() = default; + + explicit AArch64FunctionInfo(MachineFunction &MF) { (void)MF; } @@ -193,6 +184,7 @@ private: MILOHContainer LOHContainerSet; SetOfInstructions LOHRelated; }; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index f58bbbd26132..03e01329e036 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -71,6 +71,7 @@ void AArch64Subtarget::initializeProperties() { break; case Falkor: MaxInterleaveFactor = 4; + VectorInsertExtractBaseCost = 2; break; case Kryo: MaxInterleaveFactor = 4; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index e4ef0d4bb8db..d2883941e2c4 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -15,24 +15,35 @@ #include "AArch64InstructionSelector.h" #include "AArch64LegalizerInfo.h" #include "AArch64RegisterBankInfo.h" +#include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" #include "AArch64TargetTransformInfo.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/InitializePasses.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" +#include <memory> +#include <string> + using namespace llvm; static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp", @@ -154,9 +165,9 @@ extern "C" void LLVMInitializeAArch64Target() { //===----------------------------------------------------------------------===// static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) - return make_unique<AArch64_MachoTargetObjectFile>(); + return llvm::make_unique<AArch64_MachoTargetObjectFile>(); - return make_unique<AArch64_ELFTargetObjectFile>(); + return llvm::make_unique<AArch64_ELFTargetObjectFile>(); } // Helper function to build a DataLayout string @@ -202,29 +213,35 @@ AArch64TargetMachine::AArch64TargetMachine( initAsmInfo(); } -AArch64TargetMachine::~AArch64TargetMachine() {} +AArch64TargetMachine::~AArch64TargetMachine() = default; #ifdef LLVM_BUILD_GLOBAL_ISEL namespace { + struct AArch64GISelActualAccessor : public GISelAccessor { std::unique_ptr<CallLowering> CallLoweringInfo; std::unique_ptr<InstructionSelector> InstSelector; std::unique_ptr<LegalizerInfo> Legalizer; std::unique_ptr<RegisterBankInfo> RegBankInfo; + const CallLowering *getCallLowering() const override { return CallLoweringInfo.get(); } + const InstructionSelector *getInstructionSelector() const override { return InstSelector.get(); } + const LegalizerInfo *getLegalizerInfo() const override { return Legalizer.get(); } + const RegisterBankInfo *getRegBankInfo() const override { return RegBankInfo.get(); } }; -} // End anonymous namespace. + +} // end anonymous namespace #endif const AArch64Subtarget * @@ -287,6 +304,7 @@ AArch64beTargetMachine::AArch64beTargetMachine( : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} namespace { + /// AArch64 Code Generator Pass Configuration Options. class AArch64PassConfig : public TargetPassConfig { public: @@ -324,7 +342,8 @@ public: void addPreSched2() override; void addPreEmitPass() override; }; -} // namespace + +} // end anonymous namespace TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() { return TargetIRAnalysis([this](const Function &F) { @@ -414,14 +433,17 @@ bool AArch64PassConfig::addIRTranslator() { addPass(new IRTranslator()); return false; } + bool AArch64PassConfig::addLegalizeMachineIR() { addPass(new Legalizer()); return false; } + bool AArch64PassConfig::addRegBankSelect() { addPass(new RegBankSelect()); return false; } + bool AArch64PassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect()); return false; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 88c98865bbc6..1a17691fc584 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -417,14 +417,17 @@ int AArch64TTIImpl::getArithmeticInstrCost( } } -int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { +int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, + const SCEV *Ptr) { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the // computation can more often be merged into the index mode. The resulting // extra micro-ops can significantly decrease throughput. unsigned NumVectorInstToHideOverhead = 10; + int MaxMergeDistance = 64; - if (Ty->isVectorTy() && IsComplex) + if (Ty->isVectorTy() && SE && + !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) return NumVectorInstToHideOverhead; // In many cases the address computation is not merged into the instruction diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 24642cb1698e..849fd3d9b44a 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -104,7 +104,7 @@ public: TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); - int getAddressComputationCost(Type *Ty, bool IsComplex); + int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index db84afacf30e..b86a283b40d4 100644 --- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -9,45 +9,62 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCExpr.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "MCTargetDesc/AArch64TargetStreamer.h" #include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Debug.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cctype> +#include <cstdint> #include <cstdio> +#include <string> +#include <tuple> +#include <utility> +#include <vector> + using namespace llvm; namespace { -class AArch64Operand; - class AArch64AsmParser : public MCTargetAsmParser { private: StringRef Mnemonic; ///< Instruction mnemonic. // Map of register aliases registers via the .req directive. - StringMap<std::pair<bool, unsigned> > RegisterReqs; + StringMap<std::pair<bool, unsigned>> RegisterReqs; AArch64TargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); @@ -118,6 +135,7 @@ public: #include "AArch64GenAsmMatcher.inc" }; bool IsILP32; + AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) : MCTargetAsmParser(Options, STI) { @@ -143,9 +161,6 @@ public: MCSymbolRefExpr::VariantKind &DarwinRefKind, int64_t &Addend); }; -} // end anonymous namespace - -namespace { /// AArch64Operand - Instances of this class represent a parsed AArch64 machine /// instruction. @@ -531,6 +546,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 0 && Val < 2); } + bool isImm0_7() const { if (!isImm()) return false; @@ -540,6 +556,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 0 && Val < 8); } + bool isImm1_8() const { if (!isImm()) return false; @@ -549,6 +566,7 @@ public: int64_t Val = MCE->getValue(); return (Val > 0 && Val < 9); } + bool isImm0_15() const { if (!isImm()) return false; @@ -558,6 +576,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 0 && Val < 16); } + bool isImm1_16() const { if (!isImm()) return false; @@ -567,6 +586,7 @@ public: int64_t Val = MCE->getValue(); return (Val > 0 && Val < 17); } + bool isImm0_31() const { if (!isImm()) return false; @@ -576,6 +596,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 0 && Val < 32); } + bool isImm1_31() const { if (!isImm()) return false; @@ -585,6 +606,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 1 && Val < 32); } + bool isImm1_32() const { if (!isImm()) return false; @@ -594,6 +616,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 1 && Val < 33); } + bool isImm0_63() const { if (!isImm()) return false; @@ -603,6 +626,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 0 && Val < 64); } + bool isImm1_63() const { if (!isImm()) return false; @@ -612,6 +636,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 1 && Val < 64); } + bool isImm1_64() const { if (!isImm()) return false; @@ -621,6 +646,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 1 && Val < 65); } + bool isImm0_127() const { if (!isImm()) return false; @@ -630,6 +656,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 0 && Val < 128); } + bool isImm0_255() const { if (!isImm()) return false; @@ -639,6 +666,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 0 && Val < 256); } + bool isImm0_65535() const { if (!isImm()) return false; @@ -648,6 +676,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 0 && Val < 65536); } + bool isImm32_63() const { if (!isImm()) return false; @@ -657,6 +686,7 @@ public: int64_t Val = MCE->getValue(); return (Val >= 32 && Val < 64); } + bool isLogicalImm32() const { if (!isImm()) return false; @@ -669,6 +699,7 @@ public: Val &= 0xFFFFFFFF; return AArch64_AM::isLogicalImmediate(Val, 32); } + bool isLogicalImm64() const { if (!isImm()) return false; @@ -677,6 +708,7 @@ public: return false; return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64); } + bool isLogicalImm32Not() const { if (!isImm()) return false; @@ -686,6 +718,7 @@ public: int64_t Val = ~MCE->getValue() & 0xFFFFFFFF; return AArch64_AM::isLogicalImmediate(Val, 32); } + bool isLogicalImm64Not() const { if (!isImm()) return false; @@ -694,7 +727,9 @@ public: return false; return AArch64_AM::isLogicalImmediate(~MCE->getValue(), 64); } + bool isShiftedImm() const { return Kind == k_ShiftedImm; } + bool isAddSubImm() const { if (!isShiftedImm() && !isImm()) return false; @@ -737,6 +772,7 @@ public: // code deal with it. return true; } + bool isAddSubImmNeg() const { if (!isShiftedImm() && !isImm()) return false; @@ -756,7 +792,9 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr); return CE != nullptr && CE->getValue() < 0 && -CE->getValue() <= 0xfff; } + bool isCondCode() const { return Kind == k_CondCode; } + bool isSIMDImmType10() const { if (!isImm()) return false; @@ -765,6 +803,7 @@ public: return false; return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue()); } + bool isBranchTarget26() const { if (!isImm()) return false; @@ -776,6 +815,7 @@ public: return false; return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2)); } + bool isPCRelLabel19() const { if (!isImm()) return false; @@ -787,6 +827,7 @@ public: return false; return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2)); } + bool isBranchTarget14() const { if (!isImm()) return false; @@ -891,40 +932,49 @@ public: bool isFPImm() const { return Kind == k_FPImm; } bool isBarrier() const { return Kind == k_Barrier; } bool isSysReg() const { return Kind == k_SysReg; } + bool isMRSSystemRegister() const { if (!isSysReg()) return false; return SysReg.MRSReg != -1U; } + bool isMSRSystemRegister() const { if (!isSysReg()) return false; return SysReg.MSRReg != -1U; } + bool isSystemPStateFieldWithImm0_1() const { if (!isSysReg()) return false; return (SysReg.PStateField == AArch64PState::PAN || SysReg.PStateField == AArch64PState::UAO); } + bool isSystemPStateFieldWithImm0_15() const { if (!isSysReg() || isSystemPStateFieldWithImm0_1()) return false; return SysReg.PStateField != -1U; } + bool isReg() const override { return Kind == k_Register && !Reg.isVector; } bool isVectorReg() const { return Kind == k_Register && Reg.isVector; } + bool isVectorRegLo() const { return Kind == k_Register && Reg.isVector && AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains( Reg.RegNum); } + bool isGPR32as64() const { return Kind == k_Register && !Reg.isVector && AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum); } + bool isWSeqPair() const { return Kind == k_Register && !Reg.isVector && AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains( Reg.RegNum); } + bool isXSeqPair() const { return Kind == k_Register && !Reg.isVector && AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID].contains( @@ -957,19 +1007,25 @@ public: bool isVectorIndex1() const { return Kind == k_VectorIndex && VectorIndex.Val == 1; } + bool isVectorIndexB() const { return Kind == k_VectorIndex && VectorIndex.Val < 16; } + bool isVectorIndexH() const { return Kind == k_VectorIndex && VectorIndex.Val < 8; } + bool isVectorIndexS() const { return Kind == k_VectorIndex && VectorIndex.Val < 4; } + bool isVectorIndexD() const { return Kind == k_VectorIndex && VectorIndex.Val < 2; } + bool isToken() const override { return Kind == k_Token; } + bool isTokenEqual(StringRef Str) const { return Kind == k_Token && getToken() == Str; } @@ -1006,6 +1062,7 @@ public: AArch64_AM::ShiftExtendType ET = getShiftExtendType(); return ET != AArch64_AM::UXTX && ET != AArch64_AM::SXTX; } + bool isExtendLSL64() const { if (!isExtend()) return false; @@ -1836,11 +1893,10 @@ void AArch64Operand::print(raw_ostream &OS) const { OS << "<prfop invalid #" << getPrefetch() << ">"; break; } - case k_PSBHint: { + case k_PSBHint: OS << getPSBHintName(); break; - } - case k_ShiftExtend: { + case k_ShiftExtend: OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #" << getShiftExtendAmount(); if (!hasShiftExtendAmount()) @@ -1848,7 +1904,6 @@ void AArch64Operand::print(raw_ostream &OS) const { OS << '>'; break; } - } } /// @name Auto-generated Match Functions @@ -2469,7 +2524,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, Expr = MCConstantExpr::create(op2, getContext()); \ Operands.push_back( \ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ - } while (0) + } while (false) if (Mnemonic == "ic") { if (!Op.compare_lower("ialluis")) { @@ -3979,7 +4034,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } } - switch (MatchResult) { case Match_Success: { // Perform range checking and other semantic validations @@ -4550,7 +4604,6 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, return Match_InvalidOperand; } - OperandMatchResultTy AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) { @@ -4601,7 +4654,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) { return MatchOperand_ParseFail; } - if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 || + if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 || (isXReg && !XRegClass.contains(SecondReg)) || (isWReg && !WRegClass.contains(SecondReg))) { Error(E,"expected second odd register of a " @@ -4610,7 +4663,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) { } unsigned Pair = 0; - if(isXReg) { + if (isXReg) { Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube64, &AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID]); } else { diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h index 24e353cf4b96..bc2f7f181699 100644 --- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h +++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h @@ -17,15 +17,12 @@ namespace llvm { -class MCInst; -class raw_ostream; - class AArch64Disassembler : public MCDisassembler { public: AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} - ~AArch64Disassembler() {} + ~AArch64Disassembler() override = default; MCDisassembler::DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, @@ -33,6 +30,6 @@ public: raw_ostream &CStream) const override; }; -} // namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index a1edb3cef46a..c954c0eb2c6b 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -17,25 +17,30 @@ #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> using namespace llvm; namespace { + class AArch64ELFObjectWriter : public MCELFObjectTargetWriter { public: AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian, bool IsILP32); - ~AArch64ELFObjectWriter() override; + ~AArch64ELFObjectWriter() override = default; protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; bool IsILP32; -private: }; -} + +} // end anonymous namespace AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian, @@ -44,8 +49,6 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, /*HasRelocationAddend*/ true), IsILP32(IsILP32) {} -AArch64ELFObjectWriter::~AArch64ELFObjectWriter() {} - #define R_CLS(rtype) \ IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype #define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\ diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index f7058cdf2373..62dfa59483eb 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -15,15 +15,23 @@ #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> + using namespace llvm; #define DEBUG_TYPE "mccodeemitter" @@ -37,13 +45,12 @@ class AArch64MCCodeEmitter : public MCCodeEmitter { MCContext &Ctx; const MCInstrInfo &MCII; - AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT public: AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx), MCII(mcii) {} - - ~AArch64MCCodeEmitter() override {} + AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) = delete; + void operator=(const AArch64MCCodeEmitter &) = delete; + ~AArch64MCCodeEmitter() override = default; // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. @@ -181,12 +188,6 @@ private: } // end anonymous namespace -MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - MCContext &Ctx) { - return new AArch64MCCodeEmitter(MCII, Ctx); -} - /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned @@ -601,3 +602,9 @@ unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison( #define ENABLE_INSTR_PREDICATE_VERIFIER #include "AArch64GenMCCodeEmitter.inc" + +MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new AArch64MCCodeEmitter(MCII, Ctx); +} diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index 3e86a42d5be6..1b949b54590c 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -13,6 +13,7 @@ #include "AArch64TargetStreamer.h" #include "llvm/MC/ConstantPools.h" + using namespace llvm; // @@ -21,7 +22,7 @@ using namespace llvm; AArch64TargetStreamer::AArch64TargetStreamer(MCStreamer &S) : MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {} -AArch64TargetStreamer::~AArch64TargetStreamer() {} +AArch64TargetStreamer::~AArch64TargetStreamer() = default; // The constant pool handling is shared by all AArch64TargetStreamer // implementations. diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index a8e6902c252b..4acd55eb6120 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -176,12 +176,14 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); + const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>(); MCContext &Context = getObjFileLowering().getContext(); - MCSectionELF *ConfigSection = - Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0); - OutStreamer->SwitchSection(ConfigSection); + if (!STM.isAmdHsaOS()) { + MCSectionELF *ConfigSection = + Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0); + OutStreamer->SwitchSection(ConfigSection); + } - const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>(); SIProgramInfo KernelInfo; if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { getSIProgramInfo(KernelInfo, MF); diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 85cbadf0a570..5f651d4da5d2 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -269,7 +269,7 @@ unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) { unsigned encodeWaitcnt(IsaVersion Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { - unsigned Waitcnt = getWaitcntBitMask(Version);; + unsigned Waitcnt = getWaitcntBitMask(Version); Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 10e6297ef1ed..cc001b596785 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -338,14 +338,17 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { +int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, + const SCEV *Ptr) { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the // computation can more often be merged into the index mode. The resulting // extra micro-ops can significantly decrease throughput. unsigned NumVectorInstToHideOverhead = 10; + int MaxMergeDistance = 64; - if (Ty->isVectorTy() && IsComplex) + if (Ty->isVectorTy() && SE && + !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) return NumVectorInstToHideOverhead; // In many cases the address computation is not merged into the instruction diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index d83228afb0ab..731a5adf3d73 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -104,7 +104,8 @@ public: int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - int getAddressComputationCost(Type *Val, bool IsComplex); + int getAddressComputationCost(Type *Val, ScalarEvolution *SE, + const SCEV *Ptr); int getFPOpCost(Type *Ty); diff --git a/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 903f92a04431..57ead973b56e 100644 --- a/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -8,23 +8,41 @@ //===----------------------------------------------------------------------===// #include "Lanai.h" +#include "LanaiAluCode.h" +#include "LanaiCondCode.h" #include "MCTargetDesc/LanaiMCExpr.h" -#include "MCTargetDesc/LanaiMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <memory> namespace llvm { + +// Auto-generated by TableGen +static unsigned MatchRegisterName(StringRef Name); + namespace { + struct LanaiOperand; class LanaiAsmParser : public MCTargetAsmParser { @@ -80,9 +98,6 @@ private: const MCSubtargetInfo &SubtargetInfo; }; -// Auto-generated by TableGen -static unsigned MatchRegisterName(llvm::StringRef Name); - // LanaiOperand - Instances of this class represented a parsed machine // instruction struct LanaiOperand : public MCParsedAsmOperand { @@ -627,6 +642,8 @@ public: } }; +} // end anonymous namespace + bool LanaiAsmParser::ParseDirective(AsmToken /*DirectiveId*/) { return true; } bool LanaiAsmParser::MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode, @@ -680,11 +697,11 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseRegister() { if (Lexer.getKind() == AsmToken::Identifier) { RegNum = MatchRegisterName(Lexer.getTok().getIdentifier()); if (RegNum == 0) - return 0; + return nullptr; Parser.Lex(); // Eat identifier token return LanaiOperand::createReg(RegNum, Start, End); } - return 0; + return nullptr; } bool LanaiAsmParser::ParseRegister(unsigned &RegNum, SMLoc &StartLoc, @@ -701,15 +718,15 @@ bool LanaiAsmParser::ParseRegister(unsigned &RegNum, SMLoc &StartLoc, std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() { SMLoc Start = Parser.getTok().getLoc(); SMLoc End = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - const MCExpr *Res, *RHS = 0; + const MCExpr *Res, *RHS = nullptr; LanaiMCExpr::VariantKind Kind = LanaiMCExpr::VK_Lanai_None; if (Lexer.getKind() != AsmToken::Identifier) - return 0; + return nullptr; StringRef Identifier; if (Parser.parseIdentifier(Identifier)) - return 0; + return nullptr; // Check if identifier has a modifier if (Identifier.equals_lower("hi")) @@ -722,24 +739,24 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() { if (Kind != LanaiMCExpr::VK_Lanai_None) { if (Lexer.getKind() != AsmToken::LParen) { Error(Lexer.getLoc(), "Expected '('"); - return 0; + return nullptr; } Lexer.Lex(); // lex '(' // Parse identifier if (Parser.parseIdentifier(Identifier)) - return 0; + return nullptr; } // If addition parse the RHS. if (Lexer.getKind() == AsmToken::Plus && Parser.parseExpression(RHS)) - return 0; + return nullptr; // For variants parse the final ')' if (Kind != LanaiMCExpr::VK_Lanai_None) { if (Lexer.getKind() != AsmToken::RParen) { Error(Lexer.getLoc(), "Expected ')'"); - return 0; + return nullptr; } Lexer.Lex(); // lex ')' } @@ -771,7 +788,7 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseImmediate() { if (!Parser.parseExpression(ExprVal)) return LanaiOperand::createImm(ExprVal, Start, End); default: - return 0; + return nullptr; } } @@ -1204,10 +1221,9 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/, #define GET_REGISTER_MATCHER #define GET_MATCHER_IMPLEMENTATION #include "LanaiGenAsmMatcher.inc" -} // namespace extern "C" void LLVMInitializeLanaiAsmParser() { RegisterMCAsmParser<LanaiAsmParser> x(getTheLanaiTarget()); } -} // namespace llvm +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h b/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h index a317cd88ad63..e0c19e8ea644 100644 --- a/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h +++ b/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h @@ -20,14 +20,11 @@ namespace llvm { -class MCInst; -class raw_ostream; - class LanaiDisassembler : public MCDisassembler { public: LanaiDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx); - ~LanaiDisassembler() override {} + ~LanaiDisassembler() override = default; // getInstruction - See MCDisassembler. MCDisassembler::DecodeStatus @@ -36,6 +33,6 @@ public: raw_ostream &CStream) const override; }; -} // namespace llvm +} // end namespace llvm #endif // LLVM_LIB_TARGET_LANAI_DISASSEMBLER_LANAIDISASSEMBLER_H diff --git a/contrib/llvm/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h b/contrib/llvm/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h index 1c9d186ad819..59904fbaa318 100644 --- a/contrib/llvm/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h +++ b/contrib/llvm/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h @@ -14,10 +14,10 @@ #ifndef LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H #define LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstPrinter.h" namespace llvm { -class MCOperand; class LanaiInstPrinter : public MCInstPrinter { public: @@ -28,14 +28,14 @@ public: void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) override; void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMemRiOperand(const MCInst *MI, int OpNo, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); void printMemRrOperand(const MCInst *MI, int OpNo, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); void printMemSplsOperand(const MCInst *MI, int OpNo, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); void printCCOperand(const MCInst *MI, int OpNo, raw_ostream &O); void printAluOperand(const MCInst *MI, int OpNo, raw_ostream &O); void printHi16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); @@ -60,6 +60,7 @@ private: bool printMemoryStoreIncrement(const MCInst *MI, raw_ostream &Ostream, StringRef Opcode, int AddOffset); }; -} // namespace llvm + +} // end namespace llvm #endif // LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H diff --git a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp index ae7870e07d42..d156294a0b0c 100644 --- a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp @@ -11,31 +11,46 @@ // //===----------------------------------------------------------------------===// -#include "LanaiISelLowering.h" - #include "Lanai.h" +#include "LanaiCondCode.h" +#include "LanaiISelLowering.h" #include "LanaiMachineFunctionInfo.h" #include "LanaiSubtarget.h" -#include "LanaiTargetMachine.h" #include "LanaiTargetObjectFile.h" +#include "MCTargetDesc/LanaiBaseInfo.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetCallingConv.h" +#include "llvm/Target/TargetMachine.h" +#include <cassert> +#include <cmath> +#include <cstdint> +#include <cstdlib> +#include <utility> #define DEBUG_TYPE "lanai-lower" @@ -195,6 +210,7 @@ SDValue LanaiTargetLowering::LowerOperation(SDValue Op, llvm_unreachable("unimplemented operand"); } } + //===----------------------------------------------------------------------===// // Lanai Inline Assembly Support //===----------------------------------------------------------------------===// @@ -244,7 +260,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = Info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (CallOperandVal == nullptr) return CW_Default; // Look at the constraint type. switch (*Constraint) { @@ -270,7 +286,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight( void LanaiTargetLowering::LowerAsmOperandForConstraint( SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result(nullptr, 0); // Only support length 1 constraints for now. if (Constraint.length() > 1) @@ -676,7 +692,7 @@ SDValue LanaiTargetLowering::LowerCCCCallTo( } else { assert(VA.isMemLoc()); - if (StackPtr.getNode() == 0) + if (StackPtr.getNode() == nullptr) StackPtr = DAG.getCopyFromReg(Chain, DL, Lanai::SP, getPointerTy(DAG.getDataLayout())); @@ -1120,7 +1136,7 @@ const char *LanaiTargetLowering::getTargetNodeName(unsigned Opcode) const { case LanaiISD::SMALL: return "LanaiISD::SMALL"; default: - return NULL; + return nullptr; } } diff --git a/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h b/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h index 8b84bbc460e8..c6e459076ebc 100644 --- a/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h +++ b/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h @@ -21,9 +21,6 @@ namespace llvm { -class TargetInstrInfo; -class Type; - struct LanaiRegisterInfo : public LanaiGenRegisterInfo { LanaiRegisterInfo(); @@ -32,7 +29,7 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo { // Code Generation virtual methods. const uint16_t * - getCalleeSavedRegs(const MachineFunction *MF = 0) const override; + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; @@ -42,7 +39,7 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo { void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const override; + RegScavenger *RS = nullptr) const override; bool canRealignStack(const MachineFunction &MF) const override; @@ -58,6 +55,6 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo { int getDwarfRegNum(unsigned RegNum, bool IsEH) const; }; -} // namespace llvm +} // end namespace llvm #endif // LLVM_LIB_TARGET_LANAI_LANAIREGISTERINFO_H diff --git a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp index e30d5e9a18eb..e02bba529bd5 100644 --- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp @@ -9,20 +9,19 @@ #include "MCTargetDesc/LanaiBaseInfo.h" #include "MCTargetDesc/LanaiFixupKinds.h" -#include "MCTargetDesc/LanaiMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { + class LanaiELFObjectWriter : public MCELFObjectTargetWriter { public: explicit LanaiELFObjectWriter(uint8_t OSABI); - ~LanaiELFObjectWriter() override; + ~LanaiELFObjectWriter() override = default; protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, @@ -30,14 +29,13 @@ protected: bool needsRelocateWithSymbol(const MCSymbol &SD, unsigned Type) const override; }; -} // namespace + +} // end anonymous namespace LanaiELFObjectWriter::LanaiELFObjectWriter(uint8_t OSABI) : MCELFObjectTargetWriter(/*Is64Bit_=*/false, OSABI, ELF::EM_LANAI, /*HasRelocationAddend=*/true) {} -LanaiELFObjectWriter::~LanaiELFObjectWriter() {} - unsigned LanaiELFObjectWriter::getRelocType(MCContext & /*Ctx*/, const MCValue & /*Target*/, const MCFixup &Fixup, diff --git a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp index ce68b7e24dba..f5b5335bb989 100644 --- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp @@ -12,37 +12,38 @@ //===----------------------------------------------------------------------===// #include "Lanai.h" +#include "LanaiAluCode.h" #include "MCTargetDesc/LanaiBaseInfo.h" #include "MCTargetDesc/LanaiFixupKinds.h" #include "MCTargetDesc/LanaiMCExpr.h" -#include "MCTargetDesc/LanaiMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> #define DEBUG_TYPE "mccodeemitter" STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); namespace llvm { + namespace { -class LanaiMCCodeEmitter : public MCCodeEmitter { - LanaiMCCodeEmitter(const LanaiMCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const LanaiMCCodeEmitter &); // DO NOT IMPLEMENT - const MCInstrInfo &InstrInfo; - MCContext &Context; +class LanaiMCCodeEmitter : public MCCodeEmitter { public: - LanaiMCCodeEmitter(const MCInstrInfo &MCII, MCContext &C) - : InstrInfo(MCII), Context(C) {} - - ~LanaiMCCodeEmitter() override {} + LanaiMCCodeEmitter(const MCInstrInfo &MCII, MCContext &C) {} + LanaiMCCodeEmitter(const LanaiMCCodeEmitter &) = delete; + void operator=(const LanaiMCCodeEmitter &) = delete; + ~LanaiMCCodeEmitter() override = default; // The functions below are called by TableGen generated functions for getting // the binary encoding of instructions/opereands. @@ -86,6 +87,8 @@ public: const MCSubtargetInfo &STI) const; }; +} // end anonymous namespace + Lanai::Fixups FixupKind(const MCExpr *Expr) { if (isa<MCSymbolRefExpr>(Expr)) return Lanai::FIXUP_LANAI_21; @@ -298,8 +301,8 @@ unsigned LanaiMCCodeEmitter::getBranchTargetOpValue( } #include "LanaiGenMCCodeEmitter.inc" -} // namespace -} // namespace llvm + +} // end namespace llvm llvm::MCCodeEmitter * llvm::createLanaiMCCodeEmitter(const MCInstrInfo &InstrInfo, diff --git a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp index c2f8c0f7ad50..a47ff9ff3d61 100644 --- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp @@ -11,16 +11,21 @@ // //===----------------------------------------------------------------------===// +#include "LanaiMCAsmInfo.h" #include "LanaiMCTargetDesc.h" - #include "InstPrinter/LanaiInstPrinter.h" -#include "LanaiMCAsmInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" +#include <cstdint> +#include <string> #define GET_INSTRINFO_MC_DESC #include "LanaiGenInstrInfo.inc" @@ -70,7 +75,7 @@ static MCInstPrinter *createLanaiMCInstPrinter(const Triple & /*T*/, const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) return new LanaiInstPrinter(MAI, MII, MRI); - return 0; + return nullptr; } static MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple, @@ -79,6 +84,7 @@ static MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple, } namespace { + class LanaiMCInstrAnalysis : public MCInstrAnalysis { public: explicit LanaiMCInstrAnalysis(const MCInstrInfo *Info) @@ -107,6 +113,7 @@ public: } } }; + } // end anonymous namespace static MCInstrAnalysis *createLanaiInstrAnalysis(const MCInstrInfo *Info) { @@ -131,7 +138,7 @@ extern "C" void LLVMInitializeLanaiTargetMC() { // Register the MC code emitter TargetRegistry::RegisterMCCodeEmitter(getTheLanaiTarget(), - llvm::createLanaiMCCodeEmitter); + createLanaiMCCodeEmitter); // Register the ASM Backend TargetRegistry::RegisterMCAsmBackend(getTheLanaiTarget(), diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index d3c88482f092..05acd25ae5fc 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -47,7 +47,7 @@ namespace llvm { FCTIDZ, FCTIWZ, /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for - /// unsigned integers. + /// unsigned integers with round toward zero. FCTIDUZ, FCTIWUZ, /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 03b2257a88a8..fbec8787ef8d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1154,6 +1154,9 @@ defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB), defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB), "fctid", "$frD, $frB", IIC_FPGeneral, []>, isPPC64; +defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$frD), (ins f8rc:$frB), + "fctidu", "$frD, $frB", IIC_FPGeneral, + []>, isPPC64; defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB), "fctidz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 99689f656c2d..ef7d2012a233 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -603,6 +603,12 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = 0; } +class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : XForm_17<opcode, xo, OOL, IOL, asmstr, itin > { + let FRA = 0; +} + // Used for QPX class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index a7231bd2e2c0..90111bbea07d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2172,11 +2172,19 @@ let isCompare = 1, hasSideEffects = 0 in { "fcmpu $crD, $fA, $fB", IIC_FPCompare>; } +def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), + "ftdiv $crD, $fA, $fB", IIC_FPCompare>; +def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB), + "ftsqrt $crD, $fB", IIC_FPCompare>; + let Uses = [RM] in { let hasSideEffects = 0 in { defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), "fctiw", "$frD, $frB", IIC_FPGeneral, []>; + defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiwu", "$frD, $frB", IIC_FPGeneral, + []>; defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), "fctiwz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctiwz f64:$frB))]>; diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index fd2189397279..7f72ab17f619 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16985,10 +16985,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst); } - if (Cond.getOpcode() == ISD::SETCC) { + if (Cond.getOpcode() == ISD::SETCC) if (SDValue NewCond = LowerSETCC(Cond, DAG)) Cond = NewCond; - } // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y @@ -18289,6 +18288,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, /// constant. Takes immediate version of shift as input. static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, SDValue SrcOp, SDValue ShAmt, + const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT SVT = ShAmt.getSimpleValueType(); assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!"); @@ -18306,27 +18306,32 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; } - const X86Subtarget &Subtarget = - static_cast<const X86Subtarget &>(DAG.getSubtarget()); - if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND && - ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) { - // Let the shuffle legalizer expand this shift amount node. + // Need to build a vector containing shift amount. + // SSE/AVX packed shifts only use the lower 64-bit of the shift count. + // +=================+============+=======================================+ + // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as | + // +=================+============+=======================================+ + // | i64 | Yes, No | Use ShAmt as lowest elt | + // | i32 | Yes | zero-extend in-reg | + // | (i32 zext(i16)) | Yes | zero-extend in-reg | + // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) | + // +=================+============+=======================================+ + + if (SVT == MVT::i64) + ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); + else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND && + ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) { SDValue Op0 = ShAmt.getOperand(0); Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0); - ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, Subtarget, DAG); + ShAmt = DAG.getZeroExtendVectorInReg(Op0, SDLoc(Op0), MVT::v2i64); + } else if (Subtarget.hasSSE41() && + ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt); + ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64); } else { - // Need to build a vector containing shift amount. - // SSE/AVX packed shifts only use the lower 64-bit of the shift count. - SmallVector<SDValue, 4> ShOps; - ShOps.push_back(ShAmt); - if (SVT == MVT::i32) { - ShOps.push_back(DAG.getConstant(0, dl, SVT)); - ShOps.push_back(DAG.getUNDEF(SVT)); - } - ShOps.push_back(DAG.getUNDEF(SVT)); - - MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64; - ShAmt = DAG.getBuildVector(BVT, dl, ShOps); + SmallVector<SDValue, 4> ShOps = {ShAmt, DAG.getConstant(0, dl, SVT), + DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)}; + ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps); } // The return type has to be a 128-bit type with the same element @@ -19014,7 +19019,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget } case VSHIFT: return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), - Op.getOperand(1), Op.getOperand(2), DAG); + Op.getOperand(1), Op.getOperand(2), Subtarget, + DAG); case COMPRESS_EXPAND_IN_REG: { SDValue Mask = Op.getOperand(3); SDValue DataToCompress = Op.getOperand(1); @@ -21276,7 +21282,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, else if (EltVT.bitsLT(MVT::i32)) BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); - return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, DAG); + return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG); } } @@ -25951,12 +25957,11 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, // instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, + bool FloatDomain, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) { unsigned NumMaskElts = Mask.size(); unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); - bool FloatDomain = MaskVT.isFloatingPoint() || - (!Subtarget.hasAVX2() && MaskVT.is256BitVector()); // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) && @@ -26067,11 +26072,11 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, // permute instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, + bool FloatDomain, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { unsigned NumMaskElts = Mask.size(); - bool FloatDomain = MaskVT.isFloatingPoint(); bool ContainsZeros = false; SmallBitVector Zeroable(NumMaskElts, false); @@ -26211,11 +26216,10 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, // shuffle instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, - SDValue &V1, SDValue &V2, + bool FloatDomain, SDValue &V1, SDValue &V2, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, bool IsUnary) { - bool FloatDomain = MaskVT.isFloatingPoint(); unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); if (MaskVT.is128BitVector()) { @@ -26310,13 +26314,13 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, } static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, + bool FloatDomain, SDValue &V1, SDValue &V2, SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { unsigned NumMaskElts = Mask.size(); - bool FloatDomain = MaskVT.isFloatingPoint(); // Attempt to match against PALIGNR byte rotate. if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) || @@ -26594,8 +26598,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } } - if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleSrcVT, - ShuffleVT)) { + if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget, Shuffle, + ShuffleSrcVT, ShuffleVT)) { if (Depth == 1 && Root.getOpcode() == Shuffle) return false; // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) @@ -26609,8 +26613,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, return true; } - if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, - ShuffleVT, PermuteImm)) { + if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget, + Shuffle, ShuffleVT, PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) return false; // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) @@ -26626,8 +26630,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } } - if (matchBinaryVectorShuffle(MaskVT, Mask, V1, V2, Subtarget, Shuffle, - ShuffleVT, UnaryShuffle)) { + if (matchBinaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, Subtarget, + Shuffle, ShuffleVT, UnaryShuffle)) { if (Depth == 1 && Root.getOpcode() == Shuffle) return false; // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) @@ -26643,8 +26647,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, return true; } - if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, V1, V2, DL, DAG, Subtarget, - Shuffle, ShuffleVT, PermuteImm)) { + if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, DL, + DAG, Subtarget, Shuffle, ShuffleVT, + PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) return false; // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) @@ -28742,6 +28747,27 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG, DAG.getConstant(Imm, DL, MVT::i8))); return true; } + case ISD::EXTRACT_SUBVECTOR: { + unsigned EltSize = EltVT.getSizeInBits(); + if (EltSize != 32 && EltSize != 64) + return false; + MVT OpEltVT = Op.getSimpleValueType().getVectorElementType(); + // Only change element size, not type. + if (VT.isInteger() != OpEltVT.isInteger()) + return false; + uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize; + // Op0 needs to be bitcasted to a larger vector with the same element type. + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = MVT::getVectorVT(EltVT, + Op0.getSimpleValueType().getSizeInBits() / EltSize); + Op0 = DAG.getBitcast(Op0VT, Op0); + DCI.AddToWorklist(Op0.getNode()); + DCI.CombineTo(OrigOp.getNode(), + DAG.getNode(Opcode, DL, VT, Op0, + DAG.getConstant(Imm, DL, MVT::i8))); + return true; + } } return false; @@ -30921,6 +30947,59 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); } +/// Check if truncation with saturation form type \p SrcVT to \p DstVT +/// is valid for the given \p Subtarget. +static bool +isSATValidOnSubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) { + if (!Subtarget.hasAVX512()) + return false; + EVT SrcElVT = SrcVT.getScalarType(); + EVT DstElVT = DstVT.getScalarType(); + if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64) + return false; + if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32) + return false; + if (SrcVT.is512BitVector() || Subtarget.hasVLX()) + return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI(); + return false; +} + +/// Detect a pattern of truncation with saturation: +/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). +/// Return the source value to be truncated or SDValue() if the pattern was not +/// matched or the unsupported on the current target. +static SDValue +detectUSatPattern(SDValue In, EVT VT, const X86Subtarget &Subtarget) { + if (In.getOpcode() != ISD::UMIN) + return SDValue(); + + EVT InVT = In.getValueType(); + // FIXME: Scalar type may be supported if we move it to vector register. + if (!InVT.isVector() || !InVT.isSimple()) + return SDValue(); + + if (!isSATValidOnSubtarget(InVT, VT, Subtarget)) + return SDValue(); + + //Saturation with truncation. We truncate from InVT to VT. + assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() && + "Unexpected types for truncate operation"); + + SDValue SrcVal; + APInt C; + if (ISD::isConstantSplatVector(In.getOperand(0).getNode(), C)) + SrcVal = In.getOperand(1); + else if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) + SrcVal = In.getOperand(0); + else + return SDValue(); + + // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according + // the element size of the destination type. + return (C == ((uint64_t)1 << VT.getScalarSizeInBits()) - 1) ? + SrcVal : SDValue(); +} + /// This function detects the AVG pattern between vectors of unsigned i8/i16, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// X86ISD::AVG instruction. @@ -31487,6 +31566,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); + if (SDValue Val = + detectUSatPattern(St->getValue(), St->getMemoryVT(), Subtarget)) + return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), + dl, Val, St->getBasePtr(), + St->getMemoryVT(), St->getMemOperand(), DAG); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); @@ -31967,7 +32052,8 @@ combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG, /// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS. static SDValue -combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG, +combineVectorTruncationWithPACKSS(SDNode *N, const X86Subtarget &Subtarget, + SelectionDAG &DAG, SmallVector<SDValue, 8> &Regs) { assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32); EVT OutVT = N->getValueType(0); @@ -31976,8 +32062,10 @@ combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG, // Shift left by 16 bits, then arithmetic-shift right by 16 bits. SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32); for (auto &Reg : Regs) { - Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, DAG); - Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, DAG); + Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, + Subtarget, DAG); + Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, + Subtarget, DAG); } for (unsigned i = 0, e = Regs.size() / 2; i < e; i++) @@ -32046,7 +32134,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, if (Subtarget.hasSSE41() || OutSVT == MVT::i8) return combineVectorTruncationWithPACKUS(N, DAG, SubVec); else if (InSVT == MVT::i32) - return combineVectorTruncationWithPACKSS(N, DAG, SubVec); + return combineVectorTruncationWithPACKSS(N, Subtarget, DAG, SubVec); else return SDValue(); } @@ -32104,6 +32192,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; + // Try the truncation with unsigned saturation. + if (SDValue Val = detectUSatPattern(Src, VT, Subtarget)) + return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Val); + // The bitcast source is a direct mmx result. // Detect bitcasts between i32 to x86mmx if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index d7792e296a58..de4839432b9a 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -80,9 +80,12 @@ unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) { unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) { if (Vector) { - if (ST->hasAVX512()) return 512; - if (ST->hasAVX()) return 256; - if (ST->hasSSE1()) return 128; + if (ST->hasAVX512()) + return 512; + if (ST->hasAVX()) + return 256; + if (ST->hasSSE1()) + return 128; return 0; } @@ -211,11 +214,9 @@ int X86TTIImpl::getArithmeticInstrCost( }; // Look for AVX512DQ lowering tricks for custom cases. - if (ST->hasDQI()) { - if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD, - LT.second)) + if (ST->hasDQI()) + if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD, LT.second)) return LT.first * Entry->Cost; - } static const CostTblEntry AVX512BWCostTable[] = { { ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence. @@ -225,37 +226,38 @@ int X86TTIImpl::getArithmeticInstrCost( // Vectorizing division is a bad idea. See the SSE2 table for more comments. { ISD::SDIV, MVT::v64i8, 64*20 }, { ISD::SDIV, MVT::v32i16, 32*20 }, - { ISD::SDIV, MVT::v16i32, 16*20 }, - { ISD::SDIV, MVT::v8i64, 8*20 }, { ISD::UDIV, MVT::v64i8, 64*20 }, - { ISD::UDIV, MVT::v32i16, 32*20 }, - { ISD::UDIV, MVT::v16i32, 16*20 }, - { ISD::UDIV, MVT::v8i64, 8*20 }, + { ISD::UDIV, MVT::v32i16, 32*20 } }; // Look for AVX512BW lowering tricks for custom cases. - if (ST->hasBWI()) { - if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD, - LT.second)) + if (ST->hasBWI()) + if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD, LT.second)) return LT.first * Entry->Cost; - } static const CostTblEntry AVX512CostTable[] = { - { ISD::SHL, MVT::v16i32, 1 }, - { ISD::SRL, MVT::v16i32, 1 }, - { ISD::SRA, MVT::v16i32, 1 }, - { ISD::SHL, MVT::v8i64, 1 }, - { ISD::SRL, MVT::v8i64, 1 }, - { ISD::SRA, MVT::v8i64, 1 }, - - { ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence. - { ISD::MUL, MVT::v16i8, 5 }, // extend/pmullw/trunc sequence. + { ISD::SHL, MVT::v16i32, 1 }, + { ISD::SRL, MVT::v16i32, 1 }, + { ISD::SRA, MVT::v16i32, 1 }, + { ISD::SHL, MVT::v8i64, 1 }, + { ISD::SRL, MVT::v8i64, 1 }, + { ISD::SRA, MVT::v8i64, 1 }, + + { ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence. + { ISD::MUL, MVT::v16i8, 5 }, // extend/pmullw/trunc sequence. + { ISD::MUL, MVT::v16i32, 1 }, // pmulld + { ISD::MUL, MVT::v8i64, 8 }, // 3*pmuludq/3*shift/2*add + + // Vectorizing division is a bad idea. See the SSE2 table for more comments. + { ISD::SDIV, MVT::v16i32, 16*20 }, + { ISD::SDIV, MVT::v8i64, 8*20 }, + { ISD::UDIV, MVT::v16i32, 16*20 }, + { ISD::UDIV, MVT::v8i64, 8*20 } }; - if (ST->hasAVX512()) { + if (ST->hasAVX512()) if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second)) return LT.first * Entry->Cost; - } static const CostTblEntry AVX2CostTable[] = { // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to @@ -315,10 +317,9 @@ int X86TTIImpl::getArithmeticInstrCost( }; // Look for XOP lowering tricks. - if (ST->hasXOP()) { + if (ST->hasXOP()) if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second)) return LT.first * Entry->Cost; - } static const CostTblEntry AVX2CustomCostTable[] = { { ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence. @@ -334,6 +335,8 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence. { ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence. + { ISD::MUL, MVT::v8i32, 1 }, // pmulld + { ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add { ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/ @@ -344,11 +347,10 @@ int X86TTIImpl::getArithmeticInstrCost( }; // Look for AVX2 lowering tricks for custom cases. - if (ST->hasAVX2()) { + if (ST->hasAVX2()) if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD, LT.second)) return LT.first * Entry->Cost; - } static const CostTblEntry AVXCustomCostTable[] = { { ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence. @@ -372,24 +374,10 @@ int X86TTIImpl::getArithmeticInstrCost( }; // Look for AVX2 lowering tricks for custom cases. - if (ST->hasAVX()) { + if (ST->hasAVX()) if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD, LT.second)) return LT.first * Entry->Cost; - } - - static const CostTblEntry SSE42FloatCostTable[] = { - { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/ - { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/ - { ISD::FDIV, MVT::f64, 22 }, // Nehalem from http://www.agner.org/ - { ISD::FDIV, MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/ - }; - - if (ST->hasSSE42()) { - if (const auto *Entry = CostTableLookup(SSE42FloatCostTable, ISD, - LT.second)) - return LT.first * Entry->Cost; - } static const CostTblEntry SSE2UniformCostTable[] = { @@ -452,6 +440,17 @@ int X86TTIImpl::getArithmeticInstrCost( ISD = ISD::MUL; } + static const CostTblEntry SSE42CostTable[] = { + { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/ + { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/ + { ISD::FDIV, MVT::f64, 22 }, // Nehalem from http://www.agner.org/ + { ISD::FDIV, MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/ + }; + + if (ST->hasSSE42()) + if (const auto *Entry = CostTableLookup(SSE42CostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + static const CostTblEntry SSE41CostTable[] = { { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence. { ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence. @@ -471,44 +470,39 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v16i16, 2*14 }, // pblendvb sequence. { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend. { ISD::SRA, MVT::v8i32, 2*12 }, // Shift each lane + blend. + + { ISD::MUL, MVT::v4i32, 1 } // pmulld }; - if (ST->hasSSE41()) { + if (ST->hasSSE41()) if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second)) return LT.first * Entry->Cost; - } static const CostTblEntry SSE2CostTable[] = { // We don't correctly identify costs of casts because they are marked as // custom. { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. - { ISD::SHL, MVT::v32i8, 2*26 }, // cmpgtb sequence. { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SHL, MVT::v16i16, 2*32 }, // cmpgtb sequence. { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. { ISD::SHL, MVT::v8i32, 2*2*5 }, // We optimized this using mul. { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. { ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. - { ISD::SRL, MVT::v32i8, 2*26 }, // cmpgtb sequence. { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SRL, MVT::v16i16, 2*32 }, // cmpgtb sequence. { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. - { ISD::SRL, MVT::v8i32, 2*16 }, // Shift each lane + blend. { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. { ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. - { ISD::SRA, MVT::v32i8, 2*54 }, // unpacked cmpgtb sequence. { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SRA, MVT::v16i16, 2*32 }, // cmpgtb sequence. { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. - { ISD::SRA, MVT::v8i32, 2*16 }, // Shift each lane + blend. { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence. { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence. + { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle + { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/ @@ -531,10 +525,9 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::UDIV, MVT::v2i64, 2*20 }, }; - if (ST->hasSSE2()) { + if (ST->hasSSE2()) if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second)) return LT.first * Entry->Cost; - } static const CostTblEntry AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized @@ -553,307 +546,278 @@ int X86TTIImpl::getArithmeticInstrCost( // A v4i64 multiply is custom lowered as two split v2i64 vectors that then // are lowered as a series of long multiplies(3), shifts(3) and adds(2) // Because we believe v4i64 to be a legal type, we must also include the - // split factor of two in the cost table. Therefore, the cost here is 16 + // extract+insert in the cost table. Therefore, the cost here is 18 // instead of 8. - { ISD::MUL, MVT::v4i64, 16 }, + { ISD::MUL, MVT::v4i64, 18 }, }; // Look for AVX1 lowering tricks. - if (ST->hasAVX() && !ST->hasAVX2()) { - MVT VT = LT.second; - - if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, VT)) + if (ST->hasAVX() && !ST->hasAVX2()) + if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second)) return LT.first * Entry->Cost; - } - // Custom lowering of vectors. - static const CostTblEntry CustomLowered[] = { - // A v2i64/v4i64 and multiply is custom lowered as a series of long - // multiplies(3), shifts(3) and adds(2). - { ISD::MUL, MVT::v2i64, 8 }, - { ISD::MUL, MVT::v4i64, 8 }, - { ISD::MUL, MVT::v8i64, 8 } - }; - if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second)) - return LT.first * Entry->Cost; - - // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle, - // 2x pmuludq, 2x shuffle. - if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() && - !ST->hasSSE41()) - return LT.first * 6; - - static const CostTblEntry SSE1FloatCostTable[] = { + static const CostTblEntry SSE1CostTable[] = { { ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/ }; if (ST->hasSSE1()) - if (const auto *Entry = CostTableLookup(SSE1FloatCostTable, ISD, - LT.second)) + if (const auto *Entry = CostTableLookup(SSE1CostTable, ISD, LT.second)) return LT.first * Entry->Cost; + // Fallback to the default implementation. return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); } int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) { - // 64-bit packed float vectors (v2f32) are widened to type v4f32. - // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. - std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); - - static const CostTblEntry AVX512VBMIShuffleTbl[] = { - { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb - { TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb - }; - - if (ST->hasVBMI()) - if (const auto *Entry = - CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; + // 64-bit packed float vectors (v2f32) are widened to type v4f32. + // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. + std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); + + // For Broadcasts we are splatting the first element from the first input + // register, so only need to reference that input and all the output + // registers are the same. + if (Kind == TTI::SK_Broadcast) + LT.first = 1; + + // We are going to permute multiple sources and the result will be in multiple + // destinations. Providing an accurate cost only for splits where the element + // type remains the same. + if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) { + MVT LegalVT = LT.second; + if (LegalVT.getVectorElementType().getSizeInBits() == + Tp->getVectorElementType()->getPrimitiveSizeInBits() && + LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) { + + unsigned VecTySize = DL.getTypeStoreSize(Tp); + unsigned LegalVTSize = LegalVT.getStoreSize(); + // Number of source vectors after legalization: + unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize; + // Number of destination vectors after legalization: + unsigned NumOfDests = LT.first; + + Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(), + LegalVT.getVectorNumElements()); + + unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; + return NumOfShuffles * + getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr); + } - static const CostTblEntry AVX512BWShuffleTbl[] = { - { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw - { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw - { TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128 - // + 2*pshufb + vinserti64x4 - }; + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + } - if (ST->hasBWI()) - if (const auto *Entry = - CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; + // For 2-input shuffles, we must account for splitting the 2 inputs into many. + if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) { + // We assume that source and destination have the same vector type. + int NumOfDests = LT.first; + int NumOfShufflesPerDest = LT.first * 2 - 1; + LT.first = NumOfDests * NumOfShufflesPerDest; + } - static const CostTblEntry AVX512ShuffleTbl[] = { - { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd - { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps - { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq - { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd - }; + static const CostTblEntry AVX512VBMIShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb + { TTI::SK_Reverse, MVT::v32i8, 1 }, // vpermb - if (ST->hasAVX512()) - if (const auto *Entry = - CostTableLookup(AVX512ShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; + { TTI::SK_PermuteSingleSrc, MVT::v64i8, 1 }, // vpermb + { TTI::SK_PermuteSingleSrc, MVT::v32i8, 1 }, // vpermb - static const CostTblEntry AVX2ShuffleTbl[] = { - { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd - { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps - { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq - { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd - { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb - { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb + { TTI::SK_PermuteTwoSrc, MVT::v64i8, 1 }, // vpermt2b + { TTI::SK_PermuteTwoSrc, MVT::v32i8, 1 }, // vpermt2b + { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 } // vpermt2b + }; - { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw - { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb - }; + if (ST->hasVBMI()) + if (const auto *Entry = + CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; - if (ST->hasAVX2()) - if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; + static const CostTblEntry AVX512BWShuffleTbl[] = { + { TTI::SK_Broadcast, MVT::v32i16, 1 }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v64i8, 1 }, // vpbroadcastb + + { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw + { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw + { TTI::SK_Reverse, MVT::v64i8, 6 }, // vextracti64x4 + 2*vperm2i128 + // + 2*pshufb + vinserti64x4 + + { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw + { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw + { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // vpermw + { TTI::SK_PermuteSingleSrc, MVT::v64i8, 8 }, // extend to v32i16 + { TTI::SK_PermuteSingleSrc, MVT::v32i8, 3 }, // vpermw + zext/trunc + + { TTI::SK_PermuteTwoSrc, MVT::v32i16, 1 }, // vpermt2w + { TTI::SK_PermuteTwoSrc, MVT::v16i16, 1 }, // vpermt2w + { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpermt2w + { TTI::SK_PermuteTwoSrc, MVT::v32i8, 3 }, // zext + vpermt2w + trunc + { TTI::SK_PermuteTwoSrc, MVT::v64i8, 19 }, // 6 * v32i8 + 1 + { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 } // zext + vpermt2w + trunc + }; - static const CostTblEntry AVX1ShuffleTbl[] = { - { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd - { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps - { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd - { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps - { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb - // + vinsertf128 - { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb - // + vinsertf128 - - { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd - { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd - { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps - { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps - { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor - { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor - }; + if (ST->hasBWI()) + if (const auto *Entry = + CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; - if (ST->hasAVX()) - if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; + static const CostTblEntry AVX512ShuffleTbl[] = { + { TTI::SK_Broadcast, MVT::v8f64, 1 }, // vbroadcastpd + { TTI::SK_Broadcast, MVT::v16f32, 1 }, // vbroadcastps + { TTI::SK_Broadcast, MVT::v8i64, 1 }, // vpbroadcastq + { TTI::SK_Broadcast, MVT::v16i32, 1 }, // vpbroadcastd + + { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd + { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps + { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq + { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd + + { TTI::SK_PermuteSingleSrc, MVT::v8f64, 1 }, // vpermpd + { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd + { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // vpermpd + { TTI::SK_PermuteSingleSrc, MVT::v16f32, 1 }, // vpermps + { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps + { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // vpermps + { TTI::SK_PermuteSingleSrc, MVT::v8i64, 1 }, // vpermq + { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq + { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // vpermq + { TTI::SK_PermuteSingleSrc, MVT::v16i32, 1 }, // vpermd + { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd + { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // vpermd + { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb + + { TTI::SK_PermuteTwoSrc, MVT::v8f64, 1 }, // vpermt2pd + { TTI::SK_PermuteTwoSrc, MVT::v16f32, 1 }, // vpermt2ps + { TTI::SK_PermuteTwoSrc, MVT::v8i64, 1 }, // vpermt2q + { TTI::SK_PermuteTwoSrc, MVT::v16i32, 1 }, // vpermt2d + { TTI::SK_PermuteTwoSrc, MVT::v4f64, 1 }, // vpermt2pd + { TTI::SK_PermuteTwoSrc, MVT::v8f32, 1 }, // vpermt2ps + { TTI::SK_PermuteTwoSrc, MVT::v4i64, 1 }, // vpermt2q + { TTI::SK_PermuteTwoSrc, MVT::v8i32, 1 }, // vpermt2d + { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // vpermt2pd + { TTI::SK_PermuteTwoSrc, MVT::v4f32, 1 }, // vpermt2ps + { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // vpermt2q + { TTI::SK_PermuteTwoSrc, MVT::v4i32, 1 } // vpermt2d + }; - static const CostTblEntry SSE41ShuffleTbl[] = { - { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw - { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd - { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw - { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps - { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw - { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb - }; - - if (ST->hasSSE41()) - if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; + if (ST->hasAVX512()) + if (const auto *Entry = CostTableLookup(AVX512ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; - static const CostTblEntry SSSE3ShuffleTbl[] = { - { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb - { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb + static const CostTblEntry AVX2ShuffleTbl[] = { + { TTI::SK_Broadcast, MVT::v4f64, 1 }, // vbroadcastpd + { TTI::SK_Broadcast, MVT::v8f32, 1 }, // vbroadcastps + { TTI::SK_Broadcast, MVT::v4i64, 1 }, // vpbroadcastq + { TTI::SK_Broadcast, MVT::v8i32, 1 }, // vpbroadcastd + { TTI::SK_Broadcast, MVT::v16i16, 1 }, // vpbroadcastw + { TTI::SK_Broadcast, MVT::v32i8, 1 }, // vpbroadcastb + + { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd + { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps + { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq + { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd + { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb + { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb + + { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw + { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb + }; - { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por - { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por - }; + if (ST->hasAVX2()) + if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; - if (ST->hasSSSE3()) - if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; + static const CostTblEntry AVX1ShuffleTbl[] = { + { TTI::SK_Broadcast, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd + { TTI::SK_Broadcast, MVT::v8f32, 2 }, // vperm2f128 + vpermilps + { TTI::SK_Broadcast, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd + { TTI::SK_Broadcast, MVT::v8i32, 2 }, // vperm2f128 + vpermilps + { TTI::SK_Broadcast, MVT::v16i16, 3 }, // vpshuflw + vpshufd + vinsertf128 + { TTI::SK_Broadcast, MVT::v32i8, 2 }, // vpshufb + vinsertf128 + + { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd + { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps + { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd + { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps + { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb + // + vinsertf128 + { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb + // + vinsertf128 + + { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd + { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd + { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps + { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps + { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor + { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor + }; - static const CostTblEntry SSE2ShuffleTbl[] = { - { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd - { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd - { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd - { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd - { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw - // + 2*pshufd + 2*unpck + packus - - { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd - { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd - { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps - { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por - { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por - }; - - if (ST->hasSSE2()) - if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; - static const CostTblEntry SSE1ShuffleTbl[] = { - { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps - { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps - }; + static const CostTblEntry SSE41ShuffleTbl[] = { + { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps + { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb + }; - if (ST->hasSSE1()) - if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; + if (ST->hasSSE41()) + if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; - } else if (Kind == TTI::SK_PermuteTwoSrc) { - // We assume that source and destination have the same vector type. - std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); - int NumOfDests = LT.first; - int NumOfShufflesPerDest = LT.first * 2 - 1; - int NumOfShuffles = NumOfDests * NumOfShufflesPerDest; - - static const CostTblEntry AVX512VBMIShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermt2b - {ISD::VECTOR_SHUFFLE, MVT::v32i8, 1}, // vpermt2b - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1} // vpermt2b - }; - - if (ST->hasVBMI()) - if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return NumOfShuffles * Entry->Cost; - - static const CostTblEntry AVX512BWShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermt2w - {ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermt2w - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, // vpermt2w - {ISD::VECTOR_SHUFFLE, MVT::v32i8, 3}, // zext + vpermt2w + trunc - {ISD::VECTOR_SHUFFLE, MVT::v64i8, 19}, // 6 * v32i8 + 1 - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // zext + vpermt2w + trunc - }; - - if (ST->hasBWI()) - if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return NumOfShuffles * Entry->Cost; - - static const CostTblEntry AVX512ShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v8f64, 1}, // vpermt2pd - {ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermt2ps - {ISD::VECTOR_SHUFFLE, MVT::v8i64, 1}, // vpermt2q - {ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermt2d - {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vpermt2pd - {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vpermt2ps - {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vpermt2q - {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vpermt2d - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // vpermt2pd - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, // vpermt2ps - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // vpermt2q - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1} // vpermt2d - }; + static const CostTblEntry SSSE3ShuffleTbl[] = { + { TTI::SK_Broadcast, MVT::v8i16, 1 }, // pshufb + { TTI::SK_Broadcast, MVT::v16i8, 1 }, // pshufb - if (ST->hasAVX512()) - if (const auto *Entry = - CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return NumOfShuffles * Entry->Cost; - - } else if (Kind == TTI::SK_PermuteSingleSrc) { - std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); - if (LT.first == 1) { - - static const CostTblEntry AVX512VBMIShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermb - {ISD::VECTOR_SHUFFLE, MVT::v32i8, 1} // vpermb - }; - - if (ST->hasVBMI()) - if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return Entry->Cost; - - static const CostTblEntry AVX512BWShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermw - {ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermw - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, // vpermw - {ISD::VECTOR_SHUFFLE, MVT::v64i8, 8}, // extend to v32i16 - {ISD::VECTOR_SHUFFLE, MVT::v32i8, 3} // vpermw + zext/trunc - }; - - if (ST->hasBWI()) - if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return Entry->Cost; - - static const CostTblEntry AVX512ShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v8f64, 1}, // vpermpd - {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vpermpd - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // vpermpd - {ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermps - {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vpermps - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, // vpermps - {ISD::VECTOR_SHUFFLE, MVT::v8i64, 1}, // vpermq - {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vpermq - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // vpermq - {ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermd - {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vpermd - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, // vpermd - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1} // pshufb - }; - - if (ST->hasAVX512()) - if (const auto *Entry = - CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return Entry->Cost; - - } else { - // We are going to permute multiple sources and the result will be in - // multiple destinations. Providing an accurate cost only for splits where - // the element type remains the same. - - MVT LegalVT = LT.second; - if (LegalVT.getVectorElementType().getSizeInBits() == - Tp->getVectorElementType()->getPrimitiveSizeInBits() && - LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) { - - unsigned VecTySize = DL.getTypeStoreSize(Tp); - unsigned LegalVTSize = LegalVT.getStoreSize(); - // Number of source vectors after legalization: - unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize; - // Number of destination vectors after legalization: - unsigned NumOfDests = LT.first; - - Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(), - LegalVT.getVectorNumElements()); - - unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; - return NumOfShuffles * - getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr); - } - } - } + { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb + { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb + + { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por + { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por + }; + + if (ST->hasSSSE3()) + if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry SSE2ShuffleTbl[] = { + { TTI::SK_Broadcast, MVT::v2f64, 1 }, // shufpd + { TTI::SK_Broadcast, MVT::v2i64, 1 }, // pshufd + { TTI::SK_Broadcast, MVT::v4i32, 1 }, // pshufd + { TTI::SK_Broadcast, MVT::v8i16, 2 }, // pshuflw + pshufd + { TTI::SK_Broadcast, MVT::v16i8, 3 }, // unpck + pshuflw + pshufd + + { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd + { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd + { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd + { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd + { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw + // + 2*pshufd + 2*unpck + packus + + { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps + { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por + { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por + }; + + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry SSE1ShuffleTbl[] = { + { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps + { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps + { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps + }; + + if (ST->hasSSE1()) + if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } @@ -1623,17 +1587,29 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, return Cost+LT.first; } -int X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { +int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, + const SCEV *Ptr) { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the // computation can more often be merged into the index mode. The resulting // extra micro-ops can significantly decrease throughput. unsigned NumVectorInstToHideOverhead = 10; - if (Ty->isVectorTy() && IsComplex) - return NumVectorInstToHideOverhead; + // Cost modeling of Strided Access Computation is hidden by the indexing + // modes of X86 regardless of the stride value. We dont believe that there + // is a difference between constant strided access in gerenal and constant + // strided value which is less than or equal to 64. + // Even in the case of (loop invariant) stride whose value is not known at + // compile time, the address computation will not incur more than one extra + // ADD instruction. + if (Ty->isVectorTy() && SE) { + if (!BaseT::isStridedAccess(Ptr)) + return NumVectorInstToHideOverhead; + if (!BaseT::getConstantStrideStep(SE, Ptr)) + return 1; + } - return BaseT::getAddressComputationCost(Ty, IsComplex); + return BaseT::getAddressComputationCost(Ty, SE, Ptr); } int X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy, diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h index f6bcb9f569e4..c013805f4321 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -71,7 +71,8 @@ public: unsigned AddressSpace); int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment); - int getAddressComputationCost(Type *PtrTy, bool IsComplex); + int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, + const SCEV *Ptr); int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF); |