aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-06 20:24:06 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-06 20:24:06 +0000
commit95ec533a1d8c450f6c6c5e84fe85423960e13382 (patch)
treebfe77b0dccd50ed2f4b4e6299d4bc4eaafced6e7 /contrib/llvm/lib/Target
parent2b532af82919b9141e7fd04becf354a0a7dfa813 (diff)
parent7e7b6700743285c0af506ac6299ddf82ebd434b9 (diff)
Merge llvm, clang, lld and lldb trunk r291274, and resolve conflicts.
Notes
Notes: svn path=/projects/clang400-import/; revision=311544
Diffstat (limited to 'contrib/llvm/lib/Target')
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp1123
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp110
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp190
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp10
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h48
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp1
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp38
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp7
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp89
-rw-r--r--contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h9
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp13
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp27
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp7
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h3
-rw-r--r--contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp48
-rw-r--r--contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h7
-rw-r--r--contrib/llvm/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h13
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp42
-rw-r--r--contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h9
-rw-r--r--contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp12
-rw-r--r--contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp29
-rw-r--r--contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp15
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td8
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp172
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp630
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h3
35 files changed, 1261 insertions, 1442 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index 7666011f75b6..17aafa0c3d6e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -110,72 +110,34 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-collect-loh"
-static cl::opt<bool>
-PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
- cl::desc("Restrict analysis to registers invovled"
- " in LOHs"),
- cl::init(true));
-
-static cl::opt<bool>
-BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
- cl::desc("Restrict analysis at basic block scope"),
- cl::init(true));
-
STATISTIC(NumADRPSimpleCandidate,
"Number of simplifiable ADRP dominate by another");
-#ifndef NDEBUG
-STATISTIC(NumADRPComplexCandidate2,
- "Number of simplifiable ADRP reachable by 2 defs");
-STATISTIC(NumADRPComplexCandidate3,
- "Number of simplifiable ADRP reachable by 3 defs");
-STATISTIC(NumADRPComplexCandidateOther,
- "Number of simplifiable ADRP reachable by 4 or more defs");
-STATISTIC(NumADDToSTRWithImm,
- "Number of simplifiable STR with imm reachable by ADD");
-STATISTIC(NumLDRToSTRWithImm,
- "Number of simplifiable STR with imm reachable by LDR");
STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
-STATISTIC(NumADDToLDRWithImm,
- "Number of simplifiable LDR with imm reachable by ADD");
-STATISTIC(NumLDRToLDRWithImm,
- "Number of simplifiable LDR with imm reachable by LDR");
STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
-#endif // NDEBUG
STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
-#ifndef NDEBUG
-STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
-STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
-STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
-STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
-#endif // NDEBUG
STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
-STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
#define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
namespace {
+
struct AArch64CollectLOH : public MachineFunctionPass {
static char ID;
- AArch64CollectLOH() : MachineFunctionPass(ID) {
- initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
- }
+ AArch64CollectLOH() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -187,351 +149,57 @@ struct AArch64CollectLOH : public MachineFunctionPass {
StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineDominatorTree>();
+ AU.setPreservesAll();
}
-
-private:
};
-/// A set of MachineInstruction.
-typedef SetVector<const MachineInstr *> SetOfMachineInstr;
-/// Map a basic block to a set of instructions per register.
-/// This is used to represent the exposed uses of a basic block
-/// per register.
-typedef MapVector<const MachineBasicBlock *,
- std::unique_ptr<SetOfMachineInstr[]>>
-BlockToSetOfInstrsPerColor;
-/// Map a basic block to an instruction per register.
-/// This is used to represent the live-out definitions of a basic block
-/// per register.
-typedef MapVector<const MachineBasicBlock *,
- std::unique_ptr<const MachineInstr *[]>>
-BlockToInstrPerColor;
-/// Map an instruction to a set of instructions. Used to represent the
-/// mapping def to reachable uses or use to definitions.
-typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
-/// Map a basic block to a BitVector.
-/// This is used to record the kill registers per basic block.
-typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
-
-/// Map a register to a dense id.
-typedef DenseMap<unsigned, unsigned> MapRegToId;
-/// Map a dense id to a register. Used for debug purposes.
-typedef SmallVector<unsigned, 32> MapIdToReg;
-} // end anonymous namespace.
-
char AArch64CollectLOH::ID = 0;
-INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
- AARCH64_COLLECT_LOH_NAME, false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
- AARCH64_COLLECT_LOH_NAME, false, false)
-
-/// Given a couple (MBB, reg) get the corresponding set of instruction from
-/// the given "sets".
-/// If this couple does not reference any set, an empty set is added to "sets"
-/// for this couple and returned.
-/// \param nbRegs is used internally allocate some memory. It must be consistent
-/// with the way sets is used.
-static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
- const MachineBasicBlock &MBB, unsigned reg,
- unsigned nbRegs) {
- SetOfMachineInstr *result;
- BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
- if (it != sets.end())
- result = it->second.get();
- else
- result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get();
-
- return result[reg];
-}
-
-/// Given a couple (reg, MI) get the corresponding set of instructions from the
-/// the given "sets".
-/// This is used to get the uses record in sets of a definition identified by
-/// MI and reg, i.e., MI defines reg.
-/// If the couple does not reference anything, an empty set is added to
-/// "sets[reg]".
-/// \pre set[reg] is valid.
-static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
- const MachineInstr &MI) {
- return sets[reg][&MI];
-}
-
-/// Same as getUses but does not modify the input map: sets.
-/// \return NULL if the couple (reg, MI) is not in sets.
-static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
- const MachineInstr &MI) {
- InstrToInstrs::const_iterator Res = sets[reg].find(&MI);
- if (Res != sets[reg].end())
- return &(Res->second);
- return nullptr;
-}
-
-/// Initialize the reaching definition algorithm:
-/// For each basic block BB in MF, record:
-/// - its kill set.
-/// - its reachable uses (uses that are exposed to BB's predecessors).
-/// - its the generated definitions.
-/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
-/// the list of uses of exposed defintions.
-/// \param ADRPMode specifies to only consider ADRP instructions for generated
-/// definition. It also consider definitions of ADRP instructions as uses and
-/// ignore other uses. The ADRPMode is used to collect the information for LHO
-/// that involve ADRP operation only.
-static void initReachingDef(const MachineFunction &MF,
- InstrToInstrs *ColorOpToReachedUses,
- BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
- BlockToSetOfInstrsPerColor &ReachableUses,
- const MapRegToId &RegToId,
- const MachineInstr *DummyOp, bool ADRPMode) {
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- unsigned NbReg = RegToId.size();
-
- for (const MachineBasicBlock &MBB : MF) {
- auto &BBGen = Gen[&MBB];
- BBGen = make_unique<const MachineInstr *[]>(NbReg);
- std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
-
- BitVector &BBKillSet = Kill[&MBB];
- BBKillSet.resize(NbReg);
- for (const MachineInstr &MI : MBB) {
- bool IsADRP = MI.getOpcode() == AArch64::ADRP;
-
- // Process uses first.
- if (IsADRP || !ADRPMode)
- for (const MachineOperand &MO : MI.operands()) {
- // Treat ADRP def as use, as the goal of the analysis is to find
- // ADRP defs reached by other ADRP defs.
- if (!MO.isReg() || (!ADRPMode && !MO.isUse()) ||
- (ADRPMode && (!IsADRP || !MO.isDef())))
- continue;
- unsigned CurReg = MO.getReg();
- MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
- if (ItCurRegId == RegToId.end())
- continue;
- CurReg = ItCurRegId->second;
-
- // if CurReg has not been defined, this use is reachable.
- if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
- getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI);
- // current basic block definition for this color, if any, is in Gen.
- if (BBGen[CurReg])
- getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI);
- }
-
- // Process clobbers.
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isRegMask())
- continue;
- // Clobbers kill the related colors.
- const uint32_t *PreservedRegs = MO.getRegMask();
-
- // Set generated regs.
- for (const auto &Entry : RegToId) {
- unsigned Reg = Entry.second;
- // Use the global register ID when querying APIs external to this
- // pass.
- if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) {
- // Do not register clobbered definition for no ADRP.
- // This definition is not used anyway (otherwise register
- // allocation is wrong).
- BBGen[Reg] = ADRPMode ? &MI : nullptr;
- BBKillSet.set(Reg);
- }
- }
- }
-
- // Process register defs.
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
- unsigned CurReg = MO.getReg();
- MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
- if (ItCurRegId == RegToId.end())
- continue;
-
- for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
- MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
- // If this alias has not been recorded, then it is not interesting
- // for the current analysis.
- // We can end up in this situation because of tuple registers.
- // E.g., Let say we are interested in S1. When we register
- // S1, we will also register its aliases and in particular
- // the tuple Q1_Q2.
- // Now, when we encounter Q1_Q2, we will look through its aliases
- // and will find that S2 is not registered.
- if (ItRegId == RegToId.end())
- continue;
-
- BBKillSet.set(ItRegId->second);
- BBGen[ItRegId->second] = &MI;
- }
- BBGen[ItCurRegId->second] = &MI;
- }
- }
-
- // If we restrict our analysis to basic block scope, conservatively add a
- // dummy
- // use for each generated value.
- if (!ADRPMode && DummyOp && !MBB.succ_empty())
- for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
- if (BBGen[CurReg])
- getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp);
- }
-}
-
-/// Reaching def core algorithm:
-/// while an Out has changed
-/// for each bb
-/// for each color
-/// In[bb][color] = U Out[bb.predecessors][color]
-/// insert reachableUses[bb][color] in each in[bb][color]
-/// op.reachedUses
-///
-/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-static void reachingDefAlgorithm(const MachineFunction &MF,
- InstrToInstrs *ColorOpToReachedUses,
- BlockToSetOfInstrsPerColor &In,
- BlockToSetOfInstrsPerColor &Out,
- BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
- BlockToSetOfInstrsPerColor &ReachableUses,
- unsigned NbReg) {
- bool HasChanged;
- do {
- HasChanged = false;
- for (const MachineBasicBlock &MBB : MF) {
- unsigned CurReg;
- for (CurReg = 0; CurReg < NbReg; ++CurReg) {
- SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
- SetOfMachineInstr &BBReachableUses =
- getSet(ReachableUses, MBB, CurReg, NbReg);
- SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
- unsigned Size = BBOutSet.size();
- // In[bb][color] = U Out[bb.predecessors][color]
- for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
- SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
- BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
- }
- // insert reachableUses[bb][color] in each in[bb][color] op.reachedses
- for (const MachineInstr *MI : BBInSet) {
- SetOfMachineInstr &OpReachedUses =
- getUses(ColorOpToReachedUses, CurReg, *MI);
- OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
- }
- // Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
- if (!Kill[&MBB].test(CurReg))
- BBOutSet.insert(BBInSet.begin(), BBInSet.end());
- if (Gen[&MBB][CurReg])
- BBOutSet.insert(Gen[&MBB][CurReg]);
- HasChanged |= BBOutSet.size() != Size;
- }
- }
- } while (HasChanged);
-}
-
-/// Reaching definition algorithm.
-/// \param MF function on which the algorithm will operate.
-/// \param[out] ColorOpToReachedUses will contain the result of the reaching
-/// def algorithm.
-/// \param ADRPMode specify whether the reaching def algorithm should be tuned
-/// for ADRP optimization. \see initReachingDef for more details.
-/// \param DummyOp if not NULL, the algorithm will work at
-/// basic block scope and will set for every exposed definition a use to
-/// @p DummyOp.
-/// \pre ColorOpToReachedUses is an array of at least number of registers of
-/// InstrToInstrs.
-static void reachingDef(const MachineFunction &MF,
- InstrToInstrs *ColorOpToReachedUses,
- const MapRegToId &RegToId, bool ADRPMode = false,
- const MachineInstr *DummyOp = nullptr) {
- // structures:
- // For each basic block.
- // Out: a set per color of definitions that reach the
- // out boundary of this block.
- // In: Same as Out but for in boundary.
- // Gen: generated color in this block (one operation per color).
- // Kill: register set of killed color in this block.
- // ReachableUses: a set per color of uses (operation) reachable
- // for "In" definitions.
- BlockToSetOfInstrsPerColor Out, In, ReachableUses;
- BlockToInstrPerColor Gen;
- BlockToRegSet Kill;
-
- // Initialize Gen, kill and reachableUses.
- initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
- DummyOp, ADRPMode);
-
- // Algo.
- if (!DummyOp)
- reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
- ReachableUses, RegToId.size());
-}
+} // end anonymous namespace.
-#ifndef NDEBUG
-/// print the result of the reaching definition algorithm.
-static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
- unsigned NbReg, const TargetRegisterInfo *TRI,
- const MapIdToReg &IdToReg) {
- unsigned CurReg;
- for (CurReg = 0; CurReg < NbReg; ++CurReg) {
- if (ColorOpToReachedUses[CurReg].empty())
- continue;
- DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
+INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh",
+ AARCH64_COLLECT_LOH_NAME, false, false)
- for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
- DEBUG(dbgs() << "Def:\n");
- DEBUG(DefsIt.first->print(dbgs()));
- DEBUG(dbgs() << "Reachable uses:\n");
- for (const MachineInstr *MI : DefsIt.second) {
- DEBUG(MI->print(dbgs()));
- }
- }
+static bool canAddBePartOfLOH(const MachineInstr &MI) {
+ // Check immediate to see if the immediate is an address.
+ switch (MI.getOperand(2).getType()) {
+ default:
+ return false;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return true;
}
}
-#endif // NDEBUG
/// Answer the following question: Can Def be one of the definition
/// involved in a part of a LOH?
-static bool canDefBePartOfLOH(const MachineInstr *Def) {
- unsigned Opc = Def->getOpcode();
+static bool canDefBePartOfLOH(const MachineInstr &MI) {
// Accept ADRP, ADDLow and LOADGot.
- switch (Opc) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::ADRP:
return true;
case AArch64::ADDXri:
- // Check immediate to see if the immediate is an address.
- switch (Def->getOperand(2).getType()) {
- default:
- return false;
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_JumpTableIndex:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_BlockAddress:
- return true;
- }
+ return canAddBePartOfLOH(MI);
case AArch64::LDRXui:
// Check immediate to see if the immediate is an address.
- switch (Def->getOperand(2).getType()) {
+ switch (MI.getOperand(2).getType()) {
default:
return false;
case MachineOperand::MO_GlobalAddress:
- return true;
+ return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT;
}
}
- // Unreachable.
- return false;
}
/// Check whether the given instruction can the end of a LOH chain involving a
/// store.
-static bool isCandidateStore(const MachineInstr *Instr) {
- switch (Instr->getOpcode()) {
+static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::STRBBui:
@@ -543,109 +211,19 @@ static bool isCandidateStore(const MachineInstr *Instr) {
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
+ // We can only optimize the index operand.
// In case we have str xA, [xA, #imm], this is two different uses
// of xA and we cannot fold, otherwise the xA stored may be wrong,
// even if #imm == 0.
- if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
- return true;
- }
- return false;
-}
-
-/// Given the result of a reaching definition algorithm in ColorOpToReachedUses,
-/// Build the Use to Defs information and filter out obvious non-LOH candidates.
-/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
-/// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
-/// i.e., no simple chain.
-/// \param ADRPMode -- \see initReachingDef.
-static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
- const InstrToInstrs *ColorOpToReachedUses,
- const MapRegToId &RegToId,
- bool ADRPMode = false) {
-
- SetOfMachineInstr NotCandidate;
- unsigned NbReg = RegToId.size();
- MapRegToId::const_iterator EndIt = RegToId.end();
- for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
- // If this color is never defined, continue.
- if (ColorOpToReachedUses[CurReg].empty())
- continue;
-
- for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
- for (const MachineInstr *MI : DefsIt.second) {
- const MachineInstr *Def = DefsIt.first;
- MapRegToId::const_iterator It;
- // if all the reaching defs are not adrp, this use will not be
- // simplifiable.
- if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
- (!ADRPMode && !canDefBePartOfLOH(Def)) ||
- (!ADRPMode && isCandidateStore(MI) &&
- // store are LOH candidate iff the end of the chain is used as
- // base.
- ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt ||
- It->second != CurReg))) {
- NotCandidate.insert(MI);
- continue;
- }
- // Do not consider self reaching as a simplifiable case for ADRP.
- if (!ADRPMode || MI != DefsIt.first) {
- UseToReachingDefs[MI].insert(DefsIt.first);
- // If UsesIt has several reaching definitions, it is not
- // candidate for simplificaton in non-ADRPMode.
- if (!ADRPMode && UseToReachingDefs[MI].size() > 1)
- NotCandidate.insert(MI);
- }
- }
- }
- }
- for (const MachineInstr *Elem : NotCandidate) {
- DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n");
- // It would have been better if we could just remove the entry
- // from the map. Because of that, we have to filter the garbage
- // (second.empty) in the subsequence analysis.
- UseToReachingDefs[Elem].clear();
- }
-}
-
-/// Based on the use to defs information (in ADRPMode), compute the
-/// opportunities of LOH ADRP-related.
-static void computeADRP(const InstrToInstrs &UseToDefs,
- AArch64FunctionInfo &AArch64FI,
- const MachineDominatorTree *MDT) {
- DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
- for (const auto &Entry : UseToDefs) {
- unsigned Size = Entry.second.size();
- if (Size == 0)
- continue;
- if (Size == 1) {
- const MachineInstr *L2 = *Entry.second.begin();
- const MachineInstr *L1 = Entry.first;
- if (!MDT->dominates(L2, L1)) {
- DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
- << '\n');
- continue;
- }
- DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
- AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, {L2, L1});
- ++NumADRPSimpleCandidate;
- }
-#ifndef NDEBUG
- else if (Size == 2)
- ++NumADRPComplexCandidate2;
- else if (Size == 3)
- ++NumADRPComplexCandidate3;
- else
- ++NumADRPComplexCandidateOther;
-#endif
- // if Size < 1, the use should have been removed from the candidates
- assert(Size >= 1 && "No reaching defs for that use!");
+ return MI.getOperandNo(&MO) == 1 &&
+ MI.getOperand(0).getReg() != MI.getOperand(1).getReg();
}
}
/// Check whether the given instruction can be the end of a LOH chain
/// involving a load.
-static bool isCandidateLoad(const MachineInstr *Instr) {
- switch (Instr->getOpcode()) {
+static bool isCandidateLoad(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::LDRSBWui:
@@ -660,17 +238,13 @@ static bool isCandidateLoad(const MachineInstr *Instr) {
case AArch64::LDRSui:
case AArch64::LDRDui:
case AArch64::LDRQui:
- if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
- return false;
- return true;
+ return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT);
}
- // Unreachable.
- return false;
}
/// Check whether the given instruction can load a litteral.
-static bool supportLoadFromLiteral(const MachineInstr *Instr) {
- switch (Instr->getOpcode()) {
+static bool supportLoadFromLiteral(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
return false;
case AArch64::LDRSWui:
@@ -681,353 +255,233 @@ static bool supportLoadFromLiteral(const MachineInstr *Instr) {
case AArch64::LDRQui:
return true;
}
- // Unreachable.
- return false;
}
-/// Check whether the given instruction is a LOH candidate.
-/// \param UseToDefs is used to check that Instr is at the end of LOH supported
-/// chain.
-/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
-/// already been filtered out.
-static bool isCandidate(const MachineInstr *Instr,
- const InstrToInstrs &UseToDefs,
- const MachineDominatorTree *MDT) {
- if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
- return false;
+/// Number of GPR registers traked by mapRegToGPRIndex()
+static const unsigned N_GPR_REGS = 31;
+/// Map register number to index from 0-30.
+static int mapRegToGPRIndex(MCPhysReg Reg) {
+ static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs");
+ static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs");
+ if (AArch64::X0 <= Reg && Reg <= AArch64::X28)
+ return Reg - AArch64::X0;
+ if (AArch64::W0 <= Reg && Reg <= AArch64::W30)
+ return Reg - AArch64::W0;
+ // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to
+ // handle them as special cases.
+ if (Reg == AArch64::FP)
+ return 29;
+ if (Reg == AArch64::LR)
+ return 30;
+ return -1;
+}
- const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
- if (Def->getOpcode() != AArch64::ADRP) {
- // At this point, Def is ADDXri or LDRXui of the right type of
- // symbol, because we filtered out the uses that were not defined
- // by these kind of instructions (+ ADRP).
+/// State tracked per register.
+/// The main algorithm walks backwards over a basic block maintaining this
+/// datastructure for each tracked general purpose register.
+struct LOHInfo {
+ MCLOHType Type : 8; ///< "Best" type of LOH possible.
+ bool IsCandidate : 1; ///< Possible LOH candidate.
+ bool OneUser : 1; ///< Found exactly one user (yet).
+ bool MultiUsers : 1; ///< Found multiple users.
+ const MachineInstr *MI0; ///< First instruction involved in the LOH.
+ const MachineInstr *MI1; ///< Second instruction involved in the LOH
+ /// (if any).
+ const MachineInstr *LastADRP; ///< Last ADRP in same register.
+};
- // Check if this forms a simple chain: each intermediate node must
- // dominates the next one.
- if (!MDT->dominates(Def, Instr))
- return false;
- // Move one node up in the simple chain.
- if (UseToDefs.find(Def) ==
- UseToDefs.end()
- // The map may contain garbage we have to ignore.
- ||
- UseToDefs.find(Def)->second.empty())
- return false;
- Instr = Def;
- Def = *UseToDefs.find(Def)->second.begin();
+/// Update state \p Info given \p MI uses the tracked register.
+static void handleUse(const MachineInstr &MI, const MachineOperand &MO,
+ LOHInfo &Info) {
+ // We have multiple uses if we already found one before.
+ if (Info.MultiUsers || Info.OneUser) {
+ Info.IsCandidate = false;
+ Info.MultiUsers = true;
+ return;
}
- // Check if we reached the top of the simple chain:
- // - top is ADRP.
- // - check the simple chain property: each intermediate node must
- // dominates the next one.
- if (Def->getOpcode() == AArch64::ADRP)
- return MDT->dominates(Def, Instr);
- return false;
-}
-
-static bool registerADRCandidate(const MachineInstr &Use,
- const InstrToInstrs &UseToDefs,
- const InstrToInstrs *DefsPerColorToUses,
- AArch64FunctionInfo &AArch64FI,
- SetOfMachineInstr *InvolvedInLOHs,
- const MapRegToId &RegToId) {
- // Look for opportunities to turn ADRP -> ADD or
- // ADRP -> LDR GOTPAGEOFF into ADR.
- // If ADRP has more than one use. Give up.
- if (Use.getOpcode() != AArch64::ADDXri &&
- (Use.getOpcode() != AArch64::LDRXui ||
- !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
- return false;
- InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
- // The map may contain garbage that we need to ignore.
- if (It == UseToDefs.end() || It->second.empty())
- return false;
- const MachineInstr &Def = **It->second.begin();
- if (Def.getOpcode() != AArch64::ADRP)
- return false;
- // Check the number of users of ADRP.
- const SetOfMachineInstr *Users =
- getUses(DefsPerColorToUses,
- RegToId.find(Def.getOperand(0).getReg())->second, Def);
- if (Users->size() > 1) {
- ++NumADRComplexCandidate;
- return false;
+ Info.OneUser = true;
+
+ // Start new LOHInfo if applicable.
+ if (isCandidateLoad(MI)) {
+ Info.Type = MCLOH_AdrpLdr;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
+ // Note that even this is AdrpLdr now, we can switch to a Ldr variant
+ // later.
+ } else if (isCandidateStore(MI, MO)) {
+ Info.Type = MCLOH_AdrpAddStr;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
+ Info.MI1 = nullptr;
+ } else if (MI.getOpcode() == AArch64::ADDXri) {
+ Info.Type = MCLOH_AdrpAdd;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
+ } else if (MI.getOpcode() == AArch64::LDRXui &&
+ MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) {
+ Info.Type = MCLOH_AdrpLdrGot;
+ Info.IsCandidate = true;
+ Info.MI0 = &MI;
}
- ++NumADRSimpleCandidate;
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
- "ADRP already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
- "ADD already involved in LOH.");
- DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');
-
- AArch64FI.addLOHDirective(
- Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot,
- {&Def, &Use});
- return true;
}
-/// Based on the use to defs information (in non-ADRPMode), compute the
-/// opportunities of LOH non-ADRP-related
-static void computeOthers(const InstrToInstrs &UseToDefs,
- const InstrToInstrs *DefsPerColorToUses,
- AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
- const MachineDominatorTree *MDT) {
- SetOfMachineInstr *InvolvedInLOHs = nullptr;
-#ifndef NDEBUG
- SetOfMachineInstr InvolvedInLOHsStorage;
- InvolvedInLOHs = &InvolvedInLOHsStorage;
-#endif // NDEBUG
- DEBUG(dbgs() << "*** Compute LOH for Others\n");
- // ADRP -> ADD/LDR -> LDR/STR pattern.
- // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
+/// Update state \p Info given the tracked register is clobbered.
+static void handleClobber(LOHInfo &Info) {
+ Info.IsCandidate = false;
+ Info.OneUser = false;
+ Info.MultiUsers = false;
+ Info.LastADRP = nullptr;
+}
- // FIXME: When the statistics are not important,
- // This initial filtering loop can be merged into the next loop.
- // Currently, we didn't do it to have the same code for both DEBUG and
- // NDEBUG builds. Indeed, the iterator of the second loop would need
- // to be changed.
- SetOfMachineInstr PotentialCandidates;
- SetOfMachineInstr PotentialADROpportunities;
- for (auto &Use : UseToDefs) {
- // If no definition is available, this is a non candidate.
- if (Use.second.empty())
- continue;
- // Keep only instructions that are load or store and at the end of
- // a ADRP -> ADD/LDR/Nothing chain.
- // We already filtered out the no-chain cases.
- if (!isCandidate(Use.first, UseToDefs, MDT)) {
- PotentialADROpportunities.insert(Use.first);
- continue;
+/// Update state \p Info given that \p MI is possibly the middle instruction
+/// of an LOH involving 3 instructions.
+static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,
+ LOHInfo &OpInfo) {
+ if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser))
+ return false;
+ // Copy LOHInfo for dest register to LOHInfo for source register.
+ if (&DefInfo != &OpInfo) {
+ OpInfo = DefInfo;
+ // Invalidate \p DefInfo because we track it in \p OpInfo now.
+ handleClobber(DefInfo);
+ } else
+ DefInfo.LastADRP = nullptr;
+
+ // Advance state machine.
+ assert(OpInfo.IsCandidate && "Expect valid state");
+ if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) {
+ if (OpInfo.Type == MCLOH_AdrpLdr) {
+ OpInfo.Type = MCLOH_AdrpAddLdr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
+ } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
+ OpInfo.Type = MCLOH_AdrpAddStr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
}
- PotentialCandidates.insert(Use.first);
- }
-
- // Make the following distinctions for statistics as the linker does
- // know how to decode instructions:
- // - ADD/LDR/Nothing make there different patterns.
- // - LDR/STR make two different patterns.
- // Hence, 6 - 1 base patterns.
- // (because ADRP-> Nothing -> STR is not simplifiable)
-
- // The linker is only able to have a simple semantic, i.e., if pattern A
- // do B.
- // However, we want to see the opportunity we may miss if we were able to
- // catch more complex cases.
-
- // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
- // A potential candidate becomes a candidate, if its current immediate
- // operand is zero and all nodes of the chain have respectively only one user
-#ifndef NDEBUG
- SetOfMachineInstr DefsOfPotentialCandidates;
-#endif
- for (const MachineInstr *Candidate : PotentialCandidates) {
- // Get the definition of the candidate i.e., ADD or LDR.
- const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
- // Record the elements of the chain.
- const MachineInstr *L1 = Def;
- const MachineInstr *L2 = nullptr;
- unsigned ImmediateDefOpc = Def->getOpcode();
- if (Def->getOpcode() != AArch64::ADRP) {
- // Check the number of users of this node.
- const SetOfMachineInstr *Users =
- getUses(DefsPerColorToUses,
- RegToId.find(Def->getOperand(0).getReg())->second, *Def);
- if (Users->size() > 1) {
-#ifndef NDEBUG
- // if all the uses of this def are in potential candidate, this is
- // a complex candidate of level 2.
- bool IsLevel2 = true;
- for (const MachineInstr *MI : *Users) {
- if (!PotentialCandidates.count(MI)) {
- ++NumTooCplxLvl2;
- IsLevel2 = false;
- break;
- }
- }
- if (IsLevel2)
- ++NumCplxLvl2;
-#endif // NDEBUG
- PotentialADROpportunities.insert(Def);
- continue;
- }
- L2 = Def;
- Def = *UseToDefs.find(Def)->second.begin();
- L1 = Def;
- } // else the element in the middle of the chain is nothing, thus
- // Def already contains the first element of the chain.
-
- // Check the number of users of the first node in the chain, i.e., ADRP
- const SetOfMachineInstr *Users =
- getUses(DefsPerColorToUses,
- RegToId.find(Def->getOperand(0).getReg())->second, *Def);
- if (Users->size() > 1) {
-#ifndef NDEBUG
- // if all the uses of this def are in the defs of the potential candidate,
- // this is a complex candidate of level 1
- if (DefsOfPotentialCandidates.empty()) {
- // lazy init
- DefsOfPotentialCandidates = PotentialCandidates;
- for (const MachineInstr *Candidate : PotentialCandidates) {
- if (!UseToDefs.find(Candidate)->second.empty())
- DefsOfPotentialCandidates.insert(
- *UseToDefs.find(Candidate)->second.begin());
- }
- }
- bool Found = false;
- for (auto &Use : *Users) {
- if (!DefsOfPotentialCandidates.count(Use)) {
- ++NumTooCplxLvl1;
- Found = true;
- break;
- }
- }
- if (!Found)
- ++NumCplxLvl1;
-#endif // NDEBUG
- continue;
+ } else {
+ assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui");
+ assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) &&
+ "Expected GOT relocation");
+ if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
+ OpInfo.Type = MCLOH_AdrpLdrGotStr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
+ } else if (OpInfo.Type == MCLOH_AdrpLdr) {
+ OpInfo.Type = MCLOH_AdrpLdrGotLdr;
+ OpInfo.IsCandidate = true;
+ OpInfo.MI1 = &MI;
+ return true;
}
+ }
+ return false;
+}
- bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
- // If the chain is three instructions long and ldr is the second element,
- // then this ldr must load form GOT, otherwise this is not a correct chain.
- if (L2 && !IsL2Add &&
- !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
- continue;
- SmallVector<const MachineInstr *, 3> Args;
- MCLOHType Kind;
- if (isCandidateLoad(Candidate)) {
- if (!L2) {
- // At this point, the candidate LOH indicates that the ldr instruction
- // may use a direct access to the symbol. There is not such encoding
- // for loads of byte and half.
- if (!supportLoadFromLiteral(Candidate))
- continue;
+/// Update state when seeing and ADRP instruction.
+static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI,
+ LOHInfo &Info) {
+ if (Info.LastADRP != nullptr) {
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n" << '\t' << MI << '\t'
+ << *Info.LastADRP);
+ AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP});
+ ++NumADRPSimpleCandidate;
+ }
- DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
- << '\n');
- Kind = MCLOH_AdrpLdr;
- Args.push_back(L1);
- Args.push_back(Candidate);
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
- "L1 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
- "Candidate already involved in LOH.");
+ // Produce LOH directive if possible.
+ if (Info.IsCandidate) {
+ switch (Info.Type) {
+ case MCLOH_AdrpAdd:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n" << '\t' << MI << '\t'
+ << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0});
+ ++NumADRSimpleCandidate;
+ break;
+ case MCLOH_AdrpLdr:
+ if (supportLoadFromLiteral(*Info.MI0)) {
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n" << '\t' << MI << '\t'
+ << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0});
++NumADRPToLDR;
- } else {
- DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
- << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
- << '\n');
-
- Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
- Args.push_back(L1);
- Args.push_back(L2);
- Args.push_back(Candidate);
-
- PotentialADROpportunities.remove(L2);
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
- "L1 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
- "L2 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
- "Candidate already involved in LOH.");
-#ifndef NDEBUG
- // get the immediate of the load
- if (Candidate->getOperand(2).getImm() == 0)
- if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToLDR;
- else
- ++NumLDRToLDR;
- else if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToLDRWithImm;
- else
- ++NumLDRToLDRWithImm;
-#endif // NDEBUG
}
- } else {
- if (ImmediateDefOpc == AArch64::ADRP)
- continue;
- else {
-
- DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
- << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
- << '\n');
-
- Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
- Args.push_back(L1);
- Args.push_back(L2);
- Args.push_back(Candidate);
-
- PotentialADROpportunities.remove(L2);
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
- "L1 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
- "L2 already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
- "Candidate already involved in LOH.");
-#ifndef NDEBUG
- // get the immediate of the store
- if (Candidate->getOperand(2).getImm() == 0)
- if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToSTR;
- else
- ++NumLDRToSTR;
- else if (ImmediateDefOpc == AArch64::ADDXri)
- ++NumADDToSTRWithImm;
- else
- ++NumLDRToSTRWithImm;
-#endif // DEBUG
+ break;
+ case MCLOH_AdrpAddLdr:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0});
+ ++NumADDToLDR;
+ break;
+ case MCLOH_AdrpAddStr:
+ if (Info.MI1 != nullptr) {
+ DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0});
+ ++NumADDToSTR;
}
+ break;
+ case MCLOH_AdrpLdrGotLdr:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0});
+ ++NumLDRToLDR;
+ break;
+ case MCLOH_AdrpLdrGotStr:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n" << '\t' << MI << '\t'
+ << *Info.MI1 << '\t' << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0});
+ ++NumLDRToSTR;
+ break;
+ case MCLOH_AdrpLdrGot:
+ DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n" << '\t' << MI << '\t'
+ << *Info.MI0);
+ AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0});
+ break;
+ case MCLOH_AdrpAdrp:
+ llvm_unreachable("MCLOH_AdrpAdrp not used in state machine");
}
- AArch64FI.addLOHDirective(Kind, Args);
}
- // Now, we grabbed all the big patterns, check ADR opportunities.
- for (const MachineInstr *Candidate : PotentialADROpportunities)
- registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
- InvolvedInLOHs, RegToId);
+ handleClobber(Info);
+ Info.LastADRP = &MI;
}
-/// Look for every register defined by potential LOHs candidates.
-/// Map these registers with dense id in @p RegToId and vice-versa in
-/// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
-static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
- MapIdToReg &IdToReg,
- const TargetRegisterInfo *TRI) {
- unsigned CurRegId = 0;
- if (!PreCollectRegister) {
- unsigned NbReg = TRI->getNumRegs();
- for (; CurRegId < NbReg; ++CurRegId) {
- RegToId[CurRegId] = CurRegId;
- DEBUG(IdToReg.push_back(CurRegId));
- DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
- }
+static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg,
+ LOHInfo *LOHInfos) {
+ if (!MachineOperand::clobbersPhysReg(RegMask, Reg))
return;
- }
-
- DEBUG(dbgs() << "** Collect Involved Register\n");
- for (const auto &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- if (!canDefBePartOfLOH(&MI) &&
- !isCandidateLoad(&MI) && !isCandidateStore(&MI))
- continue;
+ int Idx = mapRegToGPRIndex(Reg);
+ if (Idx >= 0)
+ handleClobber(LOHInfos[Idx]);
+}
- // Process defs
- for (MachineInstr::const_mop_iterator IO = MI.operands_begin(),
- IOEnd = MI.operands_end();
- IO != IOEnd; ++IO) {
- if (!IO->isReg() || !IO->isDef())
- continue;
- unsigned CurReg = IO->getReg();
- for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
- if (RegToId.find(*AI) == RegToId.end()) {
- DEBUG(IdToReg.push_back(*AI);
- assert(IdToReg[CurRegId] == *AI &&
- "Reg index mismatches insertion index."));
- RegToId[*AI] = CurRegId++;
- DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
- }
- }
+static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
+ // Handle defs and regmasks.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask()) {
+ const uint32_t *RegMask = MO.getRegMask();
+ for (MCPhysReg Reg : AArch64::GPR32RegClass)
+ handleRegMaskClobber(RegMask, Reg, LOHInfos);
+ for (MCPhysReg Reg : AArch64::GPR64RegClass)
+ handleRegMaskClobber(RegMask, Reg, LOHInfos);
+ continue;
}
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ int Idx = mapRegToGPRIndex(MO.getReg());
+ if (Idx < 0)
+ continue;
+ handleClobber(LOHInfos[Idx]);
+ }
+ // Handle uses.
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ int Idx = mapRegToGPRIndex(MO.getReg());
+ if (Idx < 0)
+ continue;
+ handleUse(MI, MO, LOHInfos[Idx]);
}
}
@@ -1035,74 +489,59 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
-
- MapRegToId RegToId;
- MapIdToReg IdToReg;
- AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
- assert(AArch64FI && "No MachineFunctionInfo for this function!");
-
- DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
+ DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n"
+ << "Looking in function " << MF.getName() << '\n');
- collectInvolvedReg(MF, RegToId, IdToReg, TRI);
- if (RegToId.empty())
- return false;
+ LOHInfo LOHInfos[N_GPR_REGS];
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ for (const MachineBasicBlock &MBB : MF) {
+ // Reset register tracking state.
+ memset(LOHInfos, 0, sizeof(LOHInfos));
+ // Live-out registers are used.
+ for (const MachineBasicBlock *Succ : MBB.successors()) {
+ for (const auto &LI : Succ->liveins()) {
+ int RegIdx = mapRegToGPRIndex(LI.PhysReg);
+ if (RegIdx >= 0)
+ LOHInfos[RegIdx].OneUser = true;
+ }
+ }
- MachineInstr *DummyOp = nullptr;
- if (BasicBlockScopeOnly) {
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- // For local analysis, create a dummy operation to record uses that are not
- // local.
- DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
+ // Walk the basic block backwards and update the per register state machine
+ // in the process.
+ for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AArch64::ADDXri:
+ case AArch64::LDRXui:
+ if (canDefBePartOfLOH(MI)) {
+ const MachineOperand &Def = MI.getOperand(0);
+ const MachineOperand &Op = MI.getOperand(1);
+ assert(Def.isReg() && Def.isDef() && "Expected reg def");
+ assert(Op.isReg() && Op.isUse() && "Expected reg use");
+ int DefIdx = mapRegToGPRIndex(Def.getReg());
+ int OpIdx = mapRegToGPRIndex(Op.getReg());
+ if (DefIdx >= 0 && OpIdx >= 0 &&
+ handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx]))
+ continue;
+ }
+ break;
+ case AArch64::ADRP:
+ const MachineOperand &Op0 = MI.getOperand(0);
+ int Idx = mapRegToGPRIndex(Op0.getReg());
+ if (Idx >= 0) {
+ handleADRP(MI, AFI, LOHInfos[Idx]);
+ continue;
+ }
+ break;
+ }
+ handleNormalInst(MI, LOHInfos);
+ }
}
- unsigned NbReg = RegToId.size();
- bool Modified = false;
-
- // Start with ADRP.
- InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
-
- // Compute the reaching def in ADRP mode, meaning ADRP definitions
- // are first considered as uses.
- reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp);
- DEBUG(dbgs() << "ADRP reaching defs\n");
- DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
-
- // Translate the definition to uses map into a use to definitions map to ease
- // statistic computation.
- InstrToInstrs ADRPToReachingDefs;
- reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
-
- // Compute LOH for ADRP.
- computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
- delete[] ColorOpToReachedUses;
-
- // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
- ColorOpToReachedUses = new InstrToInstrs[NbReg];
-
- // first perform a regular reaching def analysis.
- reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp);
- DEBUG(dbgs() << "All reaching defs\n");
- DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
-
- // Turn that into a use to defs to ease statistic computation.
- InstrToInstrs UsesToReachingDefs;
- reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
-
- // Compute other than AdrpAdrp LOH.
- computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
- MDT);
- delete[] ColorOpToReachedUses;
-
- if (BasicBlockScopeOnly)
- MF.DeleteMachineInstr(DummyOp);
-
- return Modified;
+ // Return "no change": The pass only collects information.
+ return false;
}
-/// createAArch64CollectLOHPass - returns an instance of the Statistic for
-/// linker optimization pass.
FunctionPass *llvm::createAArch64CollectLOHPass() {
return new AArch64CollectLOH();
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4c98253878e4..74a01835171b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11,28 +11,79 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64ISelLowering.h"
#include "AArch64PerfectShuffle.h"
+#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
-#include "AArch64TargetMachine.h"
-#include "AArch64TargetObjectFile.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <bitset>
+#include <cassert>
+#include <cctype>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "aarch64-lower"
@@ -59,7 +110,6 @@ static const MVT MVT_CC = MVT::i32;
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
-
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
// we have to make something up. Arbitrarily, choose ZeroOrOne.
setBooleanContents(ZeroOrOneBooleanContent);
@@ -218,7 +268,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
@@ -3632,6 +3681,7 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
llvm_unreachable("Unexpected platform trying to use TLS");
}
+
SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
@@ -4549,7 +4599,6 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
return DAG.getMergeValues(Ops, dl);
}
-
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
/// i64 values and take a 2 x i64 value to shift plus a shift amount.
SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
@@ -5074,10 +5123,11 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
int WindowBase;
int WindowScale;
- bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
ShuffleSourceInfo(SDValue Vec)
- : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
- WindowScale(1) {}
+ : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
+ ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
+
+ bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
@@ -7028,7 +7078,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
case Intrinsic::aarch64_ldaxp:
- case Intrinsic::aarch64_ldxp: {
+ case Intrinsic::aarch64_ldxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(0);
@@ -7038,9 +7088,8 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = true;
Info.writeMem = false;
return true;
- }
case Intrinsic::aarch64_stlxp:
- case Intrinsic::aarch64_stxp: {
+ case Intrinsic::aarch64_stxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(2);
@@ -7050,7 +7099,6 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = false;
Info.writeMem = true;
return true;
- }
default:
break;
}
@@ -8044,13 +8092,13 @@ static SDValue tryCombineToEXTR(SDNode *N,
SDValue LHS;
uint32_t ShiftLHS = 0;
- bool LHSFromHi = 0;
+ bool LHSFromHi = false;
if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
return SDValue();
SDValue RHS;
uint32_t ShiftRHS = 0;
- bool RHSFromHi = 0;
+ bool RHSFromHi = false;
if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
return SDValue();
@@ -9732,52 +9780,51 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
switch(CC) {
case AArch64CC::LE:
- case AArch64CC::GT: {
+ case AArch64CC::GT:
if ((AddConstant == 0) ||
(CompConstant == MaxUInt - 1 && AddConstant < 0) ||
(AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
return true;
- } break;
+ break;
case AArch64CC::LT:
- case AArch64CC::GE: {
+ case AArch64CC::GE:
if ((AddConstant == 0) ||
(AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
return true;
- } break;
+ break;
case AArch64CC::HI:
- case AArch64CC::LS: {
+ case AArch64CC::LS:
if ((AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant >= -1 &&
CompConstant < AddConstant + MaxUInt))
return true;
- } break;
+ break;
case AArch64CC::PL:
- case AArch64CC::MI: {
+ case AArch64CC::MI:
if ((AddConstant == 0) ||
(AddConstant > 0 && CompConstant <= 0) ||
(AddConstant < 0 && CompConstant <= AddConstant))
return true;
- } break;
+ break;
case AArch64CC::LO:
- case AArch64CC::HS: {
+ case AArch64CC::HS:
if ((AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant >= 0 &&
CompConstant <= AddConstant + MaxUInt))
return true;
- } break;
+ break;
case AArch64CC::EQ:
- case AArch64CC::NE: {
+ case AArch64CC::NE:
if ((AddConstant > 0 && CompConstant < 0) ||
(AddConstant < 0 && CompConstant >= 0 &&
CompConstant < AddConstant + MaxUInt) ||
(AddConstant >= 0 && CompConstant >= 0 &&
CompConstant >= AddConstant) ||
(AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
-
return true;
- } break;
+ break;
case AArch64CC::VS:
case AArch64CC::VC:
case AArch64CC::AL:
@@ -10501,7 +10548,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
if (ValTy->getPrimitiveSizeInBits() == 128) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
- Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int);
+ Function *Ldxr = Intrinsic::getDeclaration(M, Int);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
@@ -10517,7 +10564,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
- Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+ Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
return Builder.CreateTruncOrBitCast(
Builder.CreateCall(Ldxr, Addr),
@@ -10527,8 +10574,7 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
IRBuilder<> &Builder) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Builder.CreateCall(
- llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
+ Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
}
Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 626c934f236e..5c8acba26aab 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -14,16 +14,37 @@
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include <algorithm>
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
using namespace llvm;
@@ -529,19 +550,19 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
default:
llvm_unreachable("Unknown branch opcode in Cond");
case AArch64::CBZW:
- Is64Bit = 0;
+ Is64Bit = false;
CC = AArch64CC::EQ;
break;
case AArch64::CBZX:
- Is64Bit = 1;
+ Is64Bit = true;
CC = AArch64CC::EQ;
break;
case AArch64::CBNZW:
- Is64Bit = 0;
+ Is64Bit = false;
CC = AArch64CC::NE;
break;
case AArch64::CBNZX:
- Is64Bit = 1;
+ Is64Bit = true;
CC = AArch64CC::NE;
break;
}
@@ -1044,7 +1065,7 @@ static unsigned sForm(MachineInstr &Instr) {
case AArch64::SUBSWri:
case AArch64::SUBSXrr:
case AArch64::SUBSXri:
- return Instr.getOpcode();;
+ return Instr.getOpcode();
case AArch64::ADDWrr: return AArch64::ADDSWrr;
case AArch64::ADDWri: return AArch64::ADDSWri;
@@ -1072,12 +1093,15 @@ static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
}
namespace {
+
struct UsedNZCV {
- bool N;
- bool Z;
- bool C;
- bool V;
- UsedNZCV(): N(false), Z(false), C(false), V(false) {}
+ bool N = false;
+ bool Z = false;
+ bool C = false;
+ bool V = false;
+
+ UsedNZCV() = default;
+
UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
this->N |= UsedFlags.N;
this->Z |= UsedFlags.Z;
@@ -1086,6 +1110,7 @@ struct UsedNZCV {
return *this;
}
};
+
} // end anonymous namespace
/// Find a condition code used by the instruction.
@@ -1561,7 +1586,7 @@ bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) const {
/// Check all MachineMemOperands for a hint to suppress pairing.
bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) const {
- return any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
+ return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
return MMO->getFlags() & MOSuppressPair;
});
}
@@ -1994,7 +2019,7 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
void AArch64InstrInfo::copyPhysRegTuple(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
- llvm::ArrayRef<unsigned> Indices) const {
+ ArrayRef<unsigned> Indices) const {
assert(Subtarget.hasNEON() &&
"Unexpected register copy without NEON");
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2583,7 +2608,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// <rdar://problem/11522048>
//
- if (MI.isCopy()) {
+ if (MI.isFullCopy()) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
if (SrcReg == AArch64::SP &&
@@ -2598,7 +2623,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
}
}
- // Handle the case where a copy is being spilled or refilled but the source
+ // Handle the case where a copy is being spilled or filled but the source
// and destination register class don't match. For example:
//
// %vreg0<def> = COPY %XZR; GPR64common:%vreg0
@@ -2613,7 +2638,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
//
// %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
//
- // will be refilled as
+ // will be filled as
//
// LDRDui %vreg0, fi<#0>
//
@@ -2622,9 +2647,11 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
// LDRXui %vregTemp, fi<#0>
// %vreg0 = FMOV %vregTemp
//
- if (MI.isFullCopy() && Ops.size() == 1 &&
+ if (MI.isCopy() && Ops.size() == 1 &&
// Make sure we're only folding the explicit COPY defs/uses.
(Ops[0] == 0 || Ops[0] == 1)) {
+ bool IsSpill = Ops[0] == 0;
+ bool IsFill = !IsSpill;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
MachineBasicBlock &MBB = *MI.getParent();
@@ -2632,21 +2659,112 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
const MachineOperand &SrcMO = MI.getOperand(1);
unsigned DstReg = DstMO.getReg();
unsigned SrcReg = SrcMO.getReg();
+ // This is slightly expensive to compute for physical regs since
+ // getMinimalPhysRegClass is slow.
auto getRegClass = [&](unsigned Reg) {
return TargetRegisterInfo::isVirtualRegister(Reg)
? MRI.getRegClass(Reg)
: TRI.getMinimalPhysRegClass(Reg);
};
- const TargetRegisterClass &DstRC = *getRegClass(DstReg);
- const TargetRegisterClass &SrcRC = *getRegClass(SrcReg);
- if (DstRC.getSize() == SrcRC.getSize()) {
- if (Ops[0] == 0)
+
+ if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
+ assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() &&
+ "Mismatched register size in non subreg COPY");
+ if (IsSpill)
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
- &SrcRC, &TRI);
+ getRegClass(SrcReg), &TRI);
else
- loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI);
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
+ getRegClass(DstReg), &TRI);
return &*--InsertPt;
}
+
+ // Handle cases like spilling def of:
+ //
+ // %vreg0:sub_32<def,read-undef> = COPY %WZR; GPR64common:%vreg0
+ //
+ // where the physical register source can be widened and stored to the full
+ // virtual reg destination stack slot, in this case producing:
+ //
+ // STRXui %XZR, <fi#0>
+ //
+ if (IsSpill && DstMO.isUndef() &&
+ TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ assert(SrcMO.getSubReg() == 0 &&
+ "Unexpected subreg on physical register");
+ const TargetRegisterClass *SpillRC;
+ unsigned SpillSubreg;
+ switch (DstMO.getSubReg()) {
+ default:
+ SpillRC = nullptr;
+ break;
+ case AArch64::sub_32:
+ case AArch64::ssub:
+ if (AArch64::GPR32RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::GPR64RegClass;
+ SpillSubreg = AArch64::sub_32;
+ } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::FPR64RegClass;
+ SpillSubreg = AArch64::ssub;
+ } else
+ SpillRC = nullptr;
+ break;
+ case AArch64::dsub:
+ if (AArch64::FPR64RegClass.contains(SrcReg)) {
+ SpillRC = &AArch64::FPR128RegClass;
+ SpillSubreg = AArch64::dsub;
+ } else
+ SpillRC = nullptr;
+ break;
+ }
+
+ if (SpillRC)
+ if (unsigned WidenedSrcReg =
+ TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
+ storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
+ FrameIndex, SpillRC, &TRI);
+ return &*--InsertPt;
+ }
+ }
+
+ // Handle cases like filling use of:
+ //
+ // %vreg0:sub_32<def,read-undef> = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1
+ //
+ // where we can load the full virtual reg source stack slot, into the subreg
+ // destination, in this case producing:
+ //
+ // LDRWui %vreg0:sub_32<def,read-undef>, <fi#0>
+ //
+ if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
+ const TargetRegisterClass *FillRC;
+ switch (DstMO.getSubReg()) {
+ default:
+ FillRC = nullptr;
+ break;
+ case AArch64::sub_32:
+ FillRC = &AArch64::GPR32RegClass;
+ break;
+ case AArch64::ssub:
+ FillRC = &AArch64::FPR32RegClass;
+ break;
+ case AArch64::dsub:
+ FillRC = &AArch64::FPR64RegClass;
+ break;
+ }
+
+ if (FillRC) {
+ assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() &&
+ "Mismatched regclass size on folded subreg COPY");
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
+ MachineInstr &LoadMI = *--InsertPt;
+ MachineOperand &LoadDst = LoadMI.getOperand(0);
+ assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
+ LoadDst.setSubReg(DstMO.getSubReg());
+ LoadDst.setIsUndef();
+ return &LoadMI;
+ }
+ }
}
// Cannot fold.
@@ -2936,7 +3054,7 @@ bool AArch64InstrInfo::useMachineCombiner() const {
return true;
}
-//
+
// True when Opc sets flag
static bool isCombineInstrSettingFlag(unsigned Opc) {
switch (Opc) {
@@ -2955,7 +3073,7 @@ static bool isCombineInstrSettingFlag(unsigned Opc) {
}
return false;
}
-//
+
// 32b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate32(unsigned Opc) {
switch (Opc) {
@@ -2974,7 +3092,7 @@ static bool isCombineInstrCandidate32(unsigned Opc) {
}
return false;
}
-//
+
// 64b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate64(unsigned Opc) {
switch (Opc) {
@@ -2993,7 +3111,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
}
return false;
}
-//
+
// FP Opcodes that can be combined with a FMUL
static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
switch (Inst.getOpcode()) {
@@ -3009,13 +3127,13 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
case AArch64::FSUBv2f32:
case AArch64::FSUBv2f64:
case AArch64::FSUBv4f32:
- TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
- return (Options.UnsafeFPMath ||
- Options.AllowFPOpFusion == FPOpFusion::Fast);
+ TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
+ return (Options.UnsafeFPMath ||
+ Options.AllowFPOpFusion == FPOpFusion::Fast);
}
return false;
}
-//
+
// Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate(unsigned Opc) {
return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
@@ -3205,7 +3323,7 @@ static bool getFMAPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) {
if (!isCombineInstrCandidateFP(Root))
- return 0;
+ return false;
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
@@ -3971,8 +4089,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// Record MUL and ADD/SUB for deletion
DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
-
- return;
}
/// \brief Replace csincr-branch sequence by simple conditional branch
@@ -4148,6 +4264,7 @@ AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
ArrayRef<std::pair<unsigned, const char *>>
AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
using namespace AArch64II;
+
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_PAGE, "aarch64-page"},
{MO_PAGEOFF, "aarch64-pageoff"},
@@ -4162,6 +4279,7 @@ AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
ArrayRef<std::pair<unsigned, const char *>>
AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
using namespace AArch64II;
+
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_GOT, "aarch64-got"},
{MO_NC, "aarch64-nc"},
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 90b2c0896872..5037866925d3 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -162,6 +162,10 @@ public:
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ // This tells target independent code that it is okay to pass instructions
+ // with subreg operands to foldMemoryOperandImpl.
+ bool isSubregFoldable() const override { return true; }
+
using TargetInstrInfo::foldMemoryOperandImpl;
MachineInstr *
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 20de07424c53..b51473524c72 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -1071,8 +1071,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return false;
}
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
- (CmpInst::Predicate)I.getOperand(1).getPredicate());
+ // CSINC increments the result by one when the condition code is false.
+ // Therefore, we have to invert the predicate to get an increment by 1 when
+ // the predicate is true.
+ const AArch64CC::CondCode invCC =
+ changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
+ (CmpInst::Predicate)I.getOperand(1).getPredicate()));
MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
.addDef(ZReg)
@@ -1084,7 +1088,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
.addDef(I.getOperand(0).getReg())
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
- .addImm(CC);
+ .addImm(invCC);
constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h
index 0d44e696ac20..2c6e5a912fb7 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
namespace llvm {
+
class AArch64InstrInfo;
class AArch64RegisterBankInfo;
class AArch64RegisterInfo;
@@ -29,7 +30,7 @@ public:
const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI);
- virtual bool select(MachineInstr &I) const override;
+ bool select(MachineInstr &I) const override;
private:
/// tblgen-erated 'select' implementation, used as the initial selector for
@@ -43,5 +44,6 @@ private:
const AArch64RegisterBankInfo &RBI;
};
-} // End llvm namespace.
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index ca2860afe13d..f0bffe544158 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -14,17 +14,18 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
+#include <cassert>
namespace llvm {
/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
/// contains private AArch64-specific information for each MachineFunction.
class AArch64FunctionInfo final : public MachineFunctionInfo {
-
/// Number of bytes of arguments this function has on the stack. If the callee
/// is expected to restore the argument stack this should be a multiple of 16,
/// all usable during a tail call.
@@ -34,16 +35,16 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// space to a function with 16-bytes then misalignment of this value would
/// make a stack adjustment necessary, which could not be undone by the
/// callee.
- unsigned BytesInStackArgArea;
+ unsigned BytesInStackArgArea = 0;
/// The number of bytes to restore to deallocate space for incoming
/// arguments. Canonically 0 in the C calling convention, but non-zero when
/// callee is expected to pop the args.
- unsigned ArgumentStackToRestore;
+ unsigned ArgumentStackToRestore = 0;
/// HasStackFrame - True if this function has a stack frame. Set by
/// determineCalleeSaves().
- bool HasStackFrame;
+ bool HasStackFrame = false;
/// \brief Amount of stack frame size, not including callee-saved registers.
unsigned LocalStackSize;
@@ -53,54 +54,44 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// \brief Number of TLS accesses using the special (combinable)
/// _TLS_MODULE_BASE_ symbol.
- unsigned NumLocalDynamicTLSAccesses;
+ unsigned NumLocalDynamicTLSAccesses = 0;
/// \brief FrameIndex for start of varargs area for arguments passed on the
/// stack.
- int VarArgsStackIndex;
+ int VarArgsStackIndex = 0;
/// \brief FrameIndex for start of varargs area for arguments passed in
/// general purpose registers.
- int VarArgsGPRIndex;
+ int VarArgsGPRIndex = 0;
/// \brief Size of the varargs area for arguments passed in general purpose
/// registers.
- unsigned VarArgsGPRSize;
+ unsigned VarArgsGPRSize = 0;
/// \brief FrameIndex for start of varargs area for arguments passed in
/// floating-point registers.
- int VarArgsFPRIndex;
+ int VarArgsFPRIndex = 0;
/// \brief Size of the varargs area for arguments passed in floating-point
/// registers.
- unsigned VarArgsFPRSize;
+ unsigned VarArgsFPRSize = 0;
/// True if this function has a subset of CSRs that is handled explicitly via
/// copies.
- bool IsSplitCSR;
+ bool IsSplitCSR = false;
/// True when the stack gets realigned dynamically because the size of stack
/// frame is unknown at compile time. e.g., in case of VLAs.
- bool StackRealigned;
+ bool StackRealigned = false;
/// True when the callee-save stack area has unused gaps that may be used for
/// other stack allocations.
- bool CalleeSaveStackHasFreeSpace;
+ bool CalleeSaveStackHasFreeSpace = false;
public:
- AArch64FunctionInfo()
- : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
- NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
- IsSplitCSR(false), StackRealigned(false),
- CalleeSaveStackHasFreeSpace(false) {}
-
- explicit AArch64FunctionInfo(MachineFunction &MF)
- : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
- NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
- IsSplitCSR(false), StackRealigned(false),
- CalleeSaveStackHasFreeSpace(false) {
+ AArch64FunctionInfo() = default;
+
+ explicit AArch64FunctionInfo(MachineFunction &MF) {
(void)MF;
}
@@ -193,6 +184,7 @@ private:
MILOHContainer LOHContainerSet;
SetOfInstructions LOHRelated;
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64MACHINEFUNCTIONINFO_H
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index f58bbbd26132..03e01329e036 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -71,6 +71,7 @@ void AArch64Subtarget::initializeProperties() {
break;
case Falkor:
MaxInterleaveFactor = 4;
+ VectorInsertExtractBaseCost = 2;
break;
case Kryo:
MaxInterleaveFactor = 4;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index e4ef0d4bb8db..d2883941e2c4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -15,24 +15,35 @@
#include "AArch64InstructionSelector.h"
#include "AArch64LegalizerInfo.h"
#include "AArch64RegisterBankInfo.h"
+#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
#include "AArch64TargetTransformInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
+#include <memory>
+#include <string>
+
using namespace llvm;
static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
@@ -154,9 +165,9 @@ extern "C" void LLVMInitializeAArch64Target() {
//===----------------------------------------------------------------------===//
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO())
- return make_unique<AArch64_MachoTargetObjectFile>();
+ return llvm::make_unique<AArch64_MachoTargetObjectFile>();
- return make_unique<AArch64_ELFTargetObjectFile>();
+ return llvm::make_unique<AArch64_ELFTargetObjectFile>();
}
// Helper function to build a DataLayout string
@@ -202,29 +213,35 @@ AArch64TargetMachine::AArch64TargetMachine(
initAsmInfo();
}
-AArch64TargetMachine::~AArch64TargetMachine() {}
+AArch64TargetMachine::~AArch64TargetMachine() = default;
#ifdef LLVM_BUILD_GLOBAL_ISEL
namespace {
+
struct AArch64GISelActualAccessor : public GISelAccessor {
std::unique_ptr<CallLowering> CallLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
const CallLowering *getCallLowering() const override {
return CallLoweringInfo.get();
}
+
const InstructionSelector *getInstructionSelector() const override {
return InstSelector.get();
}
+
const LegalizerInfo *getLegalizerInfo() const override {
return Legalizer.get();
}
+
const RegisterBankInfo *getRegBankInfo() const override {
return RegBankInfo.get();
}
};
-} // End anonymous namespace.
+
+} // end anonymous namespace
#endif
const AArch64Subtarget *
@@ -287,6 +304,7 @@ AArch64beTargetMachine::AArch64beTargetMachine(
: AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
namespace {
+
/// AArch64 Code Generator Pass Configuration Options.
class AArch64PassConfig : public TargetPassConfig {
public:
@@ -324,7 +342,8 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
-} // namespace
+
+} // end anonymous namespace
TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
return TargetIRAnalysis([this](const Function &F) {
@@ -414,14 +433,17 @@ bool AArch64PassConfig::addIRTranslator() {
addPass(new IRTranslator());
return false;
}
+
bool AArch64PassConfig::addLegalizeMachineIR() {
addPass(new Legalizer());
return false;
}
+
bool AArch64PassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
return false;
}
+
bool AArch64PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect());
return false;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 88c98865bbc6..1a17691fc584 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -417,14 +417,17 @@ int AArch64TTIImpl::getArithmeticInstrCost(
}
}
-int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
+ int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && IsComplex)
+ if (Ty->isVectorTy() && SE &&
+ !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
// In many cases the address computation is not merged into the instruction
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 24642cb1698e..849fd3d9b44a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -104,7 +104,7 @@ public:
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- int getAddressComputationCost(Type *Ty, bool IsComplex);
+ int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index db84afacf30e..b86a283b40d4 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -9,45 +9,62 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cctype>
+#include <cstdint>
#include <cstdio>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
namespace {
-class AArch64Operand;
-
class AArch64AsmParser : public MCTargetAsmParser {
private:
StringRef Mnemonic; ///< Instruction mnemonic.
// Map of register aliases registers via the .req directive.
- StringMap<std::pair<bool, unsigned> > RegisterReqs;
+ StringMap<std::pair<bool, unsigned>> RegisterReqs;
AArch64TargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
@@ -118,6 +135,7 @@ public:
#include "AArch64GenAsmMatcher.inc"
};
bool IsILP32;
+
AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI) {
@@ -143,9 +161,6 @@ public:
MCSymbolRefExpr::VariantKind &DarwinRefKind,
int64_t &Addend);
};
-} // end anonymous namespace
-
-namespace {
/// AArch64Operand - Instances of this class represent a parsed AArch64 machine
/// instruction.
@@ -531,6 +546,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 2);
}
+
bool isImm0_7() const {
if (!isImm())
return false;
@@ -540,6 +556,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 8);
}
+
bool isImm1_8() const {
if (!isImm())
return false;
@@ -549,6 +566,7 @@ public:
int64_t Val = MCE->getValue();
return (Val > 0 && Val < 9);
}
+
bool isImm0_15() const {
if (!isImm())
return false;
@@ -558,6 +576,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 16);
}
+
bool isImm1_16() const {
if (!isImm())
return false;
@@ -567,6 +586,7 @@ public:
int64_t Val = MCE->getValue();
return (Val > 0 && Val < 17);
}
+
bool isImm0_31() const {
if (!isImm())
return false;
@@ -576,6 +596,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 32);
}
+
bool isImm1_31() const {
if (!isImm())
return false;
@@ -585,6 +606,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 32);
}
+
bool isImm1_32() const {
if (!isImm())
return false;
@@ -594,6 +616,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 33);
}
+
bool isImm0_63() const {
if (!isImm())
return false;
@@ -603,6 +626,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 64);
}
+
bool isImm1_63() const {
if (!isImm())
return false;
@@ -612,6 +636,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 64);
}
+
bool isImm1_64() const {
if (!isImm())
return false;
@@ -621,6 +646,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 1 && Val < 65);
}
+
bool isImm0_127() const {
if (!isImm())
return false;
@@ -630,6 +656,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 128);
}
+
bool isImm0_255() const {
if (!isImm())
return false;
@@ -639,6 +666,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 256);
}
+
bool isImm0_65535() const {
if (!isImm())
return false;
@@ -648,6 +676,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 0 && Val < 65536);
}
+
bool isImm32_63() const {
if (!isImm())
return false;
@@ -657,6 +686,7 @@ public:
int64_t Val = MCE->getValue();
return (Val >= 32 && Val < 64);
}
+
bool isLogicalImm32() const {
if (!isImm())
return false;
@@ -669,6 +699,7 @@ public:
Val &= 0xFFFFFFFF;
return AArch64_AM::isLogicalImmediate(Val, 32);
}
+
bool isLogicalImm64() const {
if (!isImm())
return false;
@@ -677,6 +708,7 @@ public:
return false;
return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64);
}
+
bool isLogicalImm32Not() const {
if (!isImm())
return false;
@@ -686,6 +718,7 @@ public:
int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
return AArch64_AM::isLogicalImmediate(Val, 32);
}
+
bool isLogicalImm64Not() const {
if (!isImm())
return false;
@@ -694,7 +727,9 @@ public:
return false;
return AArch64_AM::isLogicalImmediate(~MCE->getValue(), 64);
}
+
bool isShiftedImm() const { return Kind == k_ShiftedImm; }
+
bool isAddSubImm() const {
if (!isShiftedImm() && !isImm())
return false;
@@ -737,6 +772,7 @@ public:
// code deal with it.
return true;
}
+
bool isAddSubImmNeg() const {
if (!isShiftedImm() && !isImm())
return false;
@@ -756,7 +792,9 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
return CE != nullptr && CE->getValue() < 0 && -CE->getValue() <= 0xfff;
}
+
bool isCondCode() const { return Kind == k_CondCode; }
+
bool isSIMDImmType10() const {
if (!isImm())
return false;
@@ -765,6 +803,7 @@ public:
return false;
return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue());
}
+
bool isBranchTarget26() const {
if (!isImm())
return false;
@@ -776,6 +815,7 @@ public:
return false;
return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2));
}
+
bool isPCRelLabel19() const {
if (!isImm())
return false;
@@ -787,6 +827,7 @@ public:
return false;
return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2));
}
+
bool isBranchTarget14() const {
if (!isImm())
return false;
@@ -891,40 +932,49 @@ public:
bool isFPImm() const { return Kind == k_FPImm; }
bool isBarrier() const { return Kind == k_Barrier; }
bool isSysReg() const { return Kind == k_SysReg; }
+
bool isMRSSystemRegister() const {
if (!isSysReg()) return false;
return SysReg.MRSReg != -1U;
}
+
bool isMSRSystemRegister() const {
if (!isSysReg()) return false;
return SysReg.MSRReg != -1U;
}
+
bool isSystemPStateFieldWithImm0_1() const {
if (!isSysReg()) return false;
return (SysReg.PStateField == AArch64PState::PAN ||
SysReg.PStateField == AArch64PState::UAO);
}
+
bool isSystemPStateFieldWithImm0_15() const {
if (!isSysReg() || isSystemPStateFieldWithImm0_1()) return false;
return SysReg.PStateField != -1U;
}
+
bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
+
bool isVectorRegLo() const {
return Kind == k_Register && Reg.isVector &&
AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains(
Reg.RegNum);
}
+
bool isGPR32as64() const {
return Kind == k_Register && !Reg.isVector &&
AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum);
}
+
bool isWSeqPair() const {
return Kind == k_Register && !Reg.isVector &&
AArch64MCRegisterClasses[AArch64::WSeqPairsClassRegClassID].contains(
Reg.RegNum);
}
+
bool isXSeqPair() const {
return Kind == k_Register && !Reg.isVector &&
AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID].contains(
@@ -957,19 +1007,25 @@ public:
bool isVectorIndex1() const {
return Kind == k_VectorIndex && VectorIndex.Val == 1;
}
+
bool isVectorIndexB() const {
return Kind == k_VectorIndex && VectorIndex.Val < 16;
}
+
bool isVectorIndexH() const {
return Kind == k_VectorIndex && VectorIndex.Val < 8;
}
+
bool isVectorIndexS() const {
return Kind == k_VectorIndex && VectorIndex.Val < 4;
}
+
bool isVectorIndexD() const {
return Kind == k_VectorIndex && VectorIndex.Val < 2;
}
+
bool isToken() const override { return Kind == k_Token; }
+
bool isTokenEqual(StringRef Str) const {
return Kind == k_Token && getToken() == Str;
}
@@ -1006,6 +1062,7 @@ public:
AArch64_AM::ShiftExtendType ET = getShiftExtendType();
return ET != AArch64_AM::UXTX && ET != AArch64_AM::SXTX;
}
+
bool isExtendLSL64() const {
if (!isExtend())
return false;
@@ -1836,11 +1893,10 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << "<prfop invalid #" << getPrefetch() << ">";
break;
}
- case k_PSBHint: {
+ case k_PSBHint:
OS << getPSBHintName();
break;
- }
- case k_ShiftExtend: {
+ case k_ShiftExtend:
OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
<< getShiftExtendAmount();
if (!hasShiftExtendAmount())
@@ -1848,7 +1904,6 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << '>';
break;
}
- }
}
/// @name Auto-generated Match Functions
@@ -2469,7 +2524,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
Expr = MCConstantExpr::create(op2, getContext()); \
Operands.push_back( \
AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
- } while (0)
+ } while (false)
if (Mnemonic == "ic") {
if (!Op.compare_lower("ialluis")) {
@@ -3979,7 +4034,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
}
-
switch (MatchResult) {
case Match_Success: {
// Perform range checking and other semantic validations
@@ -4550,7 +4604,6 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
return Match_InvalidOperand;
}
-
OperandMatchResultTy
AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
@@ -4601,7 +4654,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 ||
+ if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 ||
(isXReg && !XRegClass.contains(SecondReg)) ||
(isWReg && !WRegClass.contains(SecondReg))) {
Error(E,"expected second odd register of a "
@@ -4610,7 +4663,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
}
unsigned Pair = 0;
- if(isXReg) {
+ if (isXReg) {
Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube64,
&AArch64MCRegisterClasses[AArch64::XSeqPairsClassRegClassID]);
} else {
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index 24e353cf4b96..bc2f7f181699 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -17,15 +17,12 @@
namespace llvm {
-class MCInst;
-class raw_ostream;
-
class AArch64Disassembler : public MCDisassembler {
public:
AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- ~AArch64Disassembler() {}
+ ~AArch64Disassembler() override = default;
MCDisassembler::DecodeStatus
getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes,
@@ -33,6 +30,6 @@ public:
raw_ostream &CStream) const override;
};
-} // namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AARCH64_DISASSEMBLER_AARCH64DISASSEMBLER_H
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index a1edb3cef46a..c954c0eb2c6b 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -17,25 +17,30 @@
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
namespace {
+
class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
public:
AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian, bool IsILP32);
- ~AArch64ELFObjectWriter() override;
+ ~AArch64ELFObjectWriter() override = default;
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
bool IsILP32;
-private:
};
-}
+
+} // end anonymous namespace
AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
bool IsLittleEndian,
@@ -44,8 +49,6 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
/*HasRelocationAddend*/ true),
IsILP32(IsILP32) {}
-AArch64ELFObjectWriter::~AArch64ELFObjectWriter() {}
-
#define R_CLS(rtype) \
IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype
#define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index f7058cdf2373..62dfa59483eb 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -15,15 +15,23 @@
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
@@ -37,13 +45,12 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
MCContext &Ctx;
const MCInstrInfo &MCII;
- AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
public:
AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: Ctx(ctx), MCII(mcii) {}
-
- ~AArch64MCCodeEmitter() override {}
+ AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) = delete;
+ void operator=(const AArch64MCCodeEmitter &) = delete;
+ ~AArch64MCCodeEmitter() override = default;
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
@@ -181,12 +188,6 @@ private:
} // end anonymous namespace
-MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new AArch64MCCodeEmitter(MCII, Ctx);
-}
-
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned
@@ -601,3 +602,9 @@ unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison(
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "AArch64GenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new AArch64MCCodeEmitter(MCII, Ctx);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 3e86a42d5be6..1b949b54590c 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -13,6 +13,7 @@
#include "AArch64TargetStreamer.h"
#include "llvm/MC/ConstantPools.h"
+
using namespace llvm;
//
@@ -21,7 +22,7 @@ using namespace llvm;
AArch64TargetStreamer::AArch64TargetStreamer(MCStreamer &S)
: MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {}
-AArch64TargetStreamer::~AArch64TargetStreamer() {}
+AArch64TargetStreamer::~AArch64TargetStreamer() = default;
// The constant pool handling is shared by all AArch64TargetStreamer
// implementations.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index a8e6902c252b..4acd55eb6120 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -176,12 +176,14 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
+ const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
MCContext &Context = getObjFileLowering().getContext();
- MCSectionELF *ConfigSection =
- Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
- OutStreamer->SwitchSection(ConfigSection);
+ if (!STM.isAmdHsaOS()) {
+ MCSectionELF *ConfigSection =
+ Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
+ OutStreamer->SwitchSection(ConfigSection);
+ }
- const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
getSIProgramInfo(KernelInfo, MF);
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 85cbadf0a570..5f651d4da5d2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -269,7 +269,7 @@ unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
unsigned encodeWaitcnt(IsaVersion Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
- unsigned Waitcnt = getWaitcntBitMask(Version);;
+ unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 10e6297ef1ed..cc001b596785 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -338,14 +338,17 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
+ int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && IsComplex)
+ if (Ty->isVectorTy() && SE &&
+ !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
// In many cases the address computation is not merged into the instruction
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index d83228afb0ab..731a5adf3d73 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -104,7 +104,8 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- int getAddressComputationCost(Type *Val, bool IsComplex);
+ int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
+ const SCEV *Ptr);
int getFPOpCost(Type *Ty);
diff --git a/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 903f92a04431..57ead973b56e 100644
--- a/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -8,23 +8,41 @@
//===----------------------------------------------------------------------===//
#include "Lanai.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
#include "MCTargetDesc/LanaiMCExpr.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
namespace llvm {
+
+// Auto-generated by TableGen
+static unsigned MatchRegisterName(StringRef Name);
+
namespace {
+
struct LanaiOperand;
class LanaiAsmParser : public MCTargetAsmParser {
@@ -80,9 +98,6 @@ private:
const MCSubtargetInfo &SubtargetInfo;
};
-// Auto-generated by TableGen
-static unsigned MatchRegisterName(llvm::StringRef Name);
-
// LanaiOperand - Instances of this class represented a parsed machine
// instruction
struct LanaiOperand : public MCParsedAsmOperand {
@@ -627,6 +642,8 @@ public:
}
};
+} // end anonymous namespace
+
bool LanaiAsmParser::ParseDirective(AsmToken /*DirectiveId*/) { return true; }
bool LanaiAsmParser::MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
@@ -680,11 +697,11 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseRegister() {
if (Lexer.getKind() == AsmToken::Identifier) {
RegNum = MatchRegisterName(Lexer.getTok().getIdentifier());
if (RegNum == 0)
- return 0;
+ return nullptr;
Parser.Lex(); // Eat identifier token
return LanaiOperand::createReg(RegNum, Start, End);
}
- return 0;
+ return nullptr;
}
bool LanaiAsmParser::ParseRegister(unsigned &RegNum, SMLoc &StartLoc,
@@ -701,15 +718,15 @@ bool LanaiAsmParser::ParseRegister(unsigned &RegNum, SMLoc &StartLoc,
std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() {
SMLoc Start = Parser.getTok().getLoc();
SMLoc End = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- const MCExpr *Res, *RHS = 0;
+ const MCExpr *Res, *RHS = nullptr;
LanaiMCExpr::VariantKind Kind = LanaiMCExpr::VK_Lanai_None;
if (Lexer.getKind() != AsmToken::Identifier)
- return 0;
+ return nullptr;
StringRef Identifier;
if (Parser.parseIdentifier(Identifier))
- return 0;
+ return nullptr;
// Check if identifier has a modifier
if (Identifier.equals_lower("hi"))
@@ -722,24 +739,24 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseIdentifier() {
if (Kind != LanaiMCExpr::VK_Lanai_None) {
if (Lexer.getKind() != AsmToken::LParen) {
Error(Lexer.getLoc(), "Expected '('");
- return 0;
+ return nullptr;
}
Lexer.Lex(); // lex '('
// Parse identifier
if (Parser.parseIdentifier(Identifier))
- return 0;
+ return nullptr;
}
// If addition parse the RHS.
if (Lexer.getKind() == AsmToken::Plus && Parser.parseExpression(RHS))
- return 0;
+ return nullptr;
// For variants parse the final ')'
if (Kind != LanaiMCExpr::VK_Lanai_None) {
if (Lexer.getKind() != AsmToken::RParen) {
Error(Lexer.getLoc(), "Expected ')'");
- return 0;
+ return nullptr;
}
Lexer.Lex(); // lex ')'
}
@@ -771,7 +788,7 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseImmediate() {
if (!Parser.parseExpression(ExprVal))
return LanaiOperand::createImm(ExprVal, Start, End);
default:
- return 0;
+ return nullptr;
}
}
@@ -1204,10 +1221,9 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/,
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#include "LanaiGenAsmMatcher.inc"
-} // namespace
extern "C" void LLVMInitializeLanaiAsmParser() {
RegisterMCAsmParser<LanaiAsmParser> x(getTheLanaiTarget());
}
-} // namespace llvm
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h b/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
index a317cd88ad63..e0c19e8ea644 100644
--- a/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
+++ b/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
@@ -20,14 +20,11 @@
namespace llvm {
-class MCInst;
-class raw_ostream;
-
class LanaiDisassembler : public MCDisassembler {
public:
LanaiDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx);
- ~LanaiDisassembler() override {}
+ ~LanaiDisassembler() override = default;
// getInstruction - See MCDisassembler.
MCDisassembler::DecodeStatus
@@ -36,6 +33,6 @@ public:
raw_ostream &CStream) const override;
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LANAI_DISASSEMBLER_LANAIDISASSEMBLER_H
diff --git a/contrib/llvm/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h b/contrib/llvm/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
index 1c9d186ad819..59904fbaa318 100644
--- a/contrib/llvm/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
+++ b/contrib/llvm/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
@@ -14,10 +14,10 @@
#ifndef LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
#define LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
-class MCOperand;
class LanaiInstPrinter : public MCInstPrinter {
public:
@@ -28,14 +28,14 @@ public:
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemRiOperand(const MCInst *MI, int OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printMemRrOperand(const MCInst *MI, int OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printMemSplsOperand(const MCInst *MI, int OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printCCOperand(const MCInst *MI, int OpNo, raw_ostream &O);
void printAluOperand(const MCInst *MI, int OpNo, raw_ostream &O);
void printHi16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -60,6 +60,7 @@ private:
bool printMemoryStoreIncrement(const MCInst *MI, raw_ostream &Ostream,
StringRef Opcode, int AddOffset);
};
-} // namespace llvm
+
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index ae7870e07d42..d156294a0b0c 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -11,31 +11,46 @@
//
//===----------------------------------------------------------------------===//
-#include "LanaiISelLowering.h"
-
#include "Lanai.h"
+#include "LanaiCondCode.h"
+#include "LanaiISelLowering.h"
#include "LanaiMachineFunctionInfo.h"
#include "LanaiSubtarget.h"
-#include "LanaiTargetMachine.h"
#include "LanaiTargetObjectFile.h"
+#include "MCTargetDesc/LanaiBaseInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <utility>
#define DEBUG_TYPE "lanai-lower"
@@ -195,6 +210,7 @@ SDValue LanaiTargetLowering::LowerOperation(SDValue Op,
llvm_unreachable("unimplemented operand");
}
}
+
//===----------------------------------------------------------------------===//
// Lanai Inline Assembly Support
//===----------------------------------------------------------------------===//
@@ -244,7 +260,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight(
Value *CallOperandVal = Info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (CallOperandVal == nullptr)
return CW_Default;
// Look at the constraint type.
switch (*Constraint) {
@@ -270,7 +286,7 @@ LanaiTargetLowering::getSingleConstraintMatchWeight(
void LanaiTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
- SDValue Result(0, 0);
+ SDValue Result(nullptr, 0);
// Only support length 1 constraints for now.
if (Constraint.length() > 1)
@@ -676,7 +692,7 @@ SDValue LanaiTargetLowering::LowerCCCCallTo(
} else {
assert(VA.isMemLoc());
- if (StackPtr.getNode() == 0)
+ if (StackPtr.getNode() == nullptr)
StackPtr = DAG.getCopyFromReg(Chain, DL, Lanai::SP,
getPointerTy(DAG.getDataLayout()));
@@ -1120,7 +1136,7 @@ const char *LanaiTargetLowering::getTargetNodeName(unsigned Opcode) const {
case LanaiISD::SMALL:
return "LanaiISD::SMALL";
default:
- return NULL;
+ return nullptr;
}
}
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h b/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
index 8b84bbc460e8..c6e459076ebc 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
@@ -21,9 +21,6 @@
namespace llvm {
-class TargetInstrInfo;
-class Type;
-
struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
LanaiRegisterInfo();
@@ -32,7 +29,7 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
// Code Generation virtual methods.
const uint16_t *
- getCalleeSavedRegs(const MachineFunction *MF = 0) const override;
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
@@ -42,7 +39,7 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
- RegScavenger *RS = NULL) const override;
+ RegScavenger *RS = nullptr) const override;
bool canRealignStack(const MachineFunction &MF) const override;
@@ -58,6 +55,6 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
int getDwarfRegNum(unsigned RegNum, bool IsEH) const;
};
-} // namespace llvm
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_LANAI_LANAIREGISTERINFO_H
diff --git a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
index e30d5e9a18eb..e02bba529bd5 100644
--- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
@@ -9,20 +9,19 @@
#include "MCTargetDesc/LanaiBaseInfo.h"
#include "MCTargetDesc/LanaiFixupKinds.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
+
class LanaiELFObjectWriter : public MCELFObjectTargetWriter {
public:
explicit LanaiELFObjectWriter(uint8_t OSABI);
- ~LanaiELFObjectWriter() override;
+ ~LanaiELFObjectWriter() override = default;
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
@@ -30,14 +29,13 @@ protected:
bool needsRelocateWithSymbol(const MCSymbol &SD,
unsigned Type) const override;
};
-} // namespace
+
+} // end anonymous namespace
LanaiELFObjectWriter::LanaiELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit_=*/false, OSABI, ELF::EM_LANAI,
/*HasRelocationAddend=*/true) {}
-LanaiELFObjectWriter::~LanaiELFObjectWriter() {}
-
unsigned LanaiELFObjectWriter::getRelocType(MCContext & /*Ctx*/,
const MCValue & /*Target*/,
const MCFixup &Fixup,
diff --git a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index ce68b7e24dba..f5b5335bb989 100644
--- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -12,37 +12,38 @@
//===----------------------------------------------------------------------===//
#include "Lanai.h"
+#include "LanaiAluCode.h"
#include "MCTargetDesc/LanaiBaseInfo.h"
#include "MCTargetDesc/LanaiFixupKinds.h"
#include "MCTargetDesc/LanaiMCExpr.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
#define DEBUG_TYPE "mccodeemitter"
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace llvm {
+
namespace {
-class LanaiMCCodeEmitter : public MCCodeEmitter {
- LanaiMCCodeEmitter(const LanaiMCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const LanaiMCCodeEmitter &); // DO NOT IMPLEMENT
- const MCInstrInfo &InstrInfo;
- MCContext &Context;
+class LanaiMCCodeEmitter : public MCCodeEmitter {
public:
- LanaiMCCodeEmitter(const MCInstrInfo &MCII, MCContext &C)
- : InstrInfo(MCII), Context(C) {}
-
- ~LanaiMCCodeEmitter() override {}
+ LanaiMCCodeEmitter(const MCInstrInfo &MCII, MCContext &C) {}
+ LanaiMCCodeEmitter(const LanaiMCCodeEmitter &) = delete;
+ void operator=(const LanaiMCCodeEmitter &) = delete;
+ ~LanaiMCCodeEmitter() override = default;
// The functions below are called by TableGen generated functions for getting
// the binary encoding of instructions/opereands.
@@ -86,6 +87,8 @@ public:
const MCSubtargetInfo &STI) const;
};
+} // end anonymous namespace
+
Lanai::Fixups FixupKind(const MCExpr *Expr) {
if (isa<MCSymbolRefExpr>(Expr))
return Lanai::FIXUP_LANAI_21;
@@ -298,8 +301,8 @@ unsigned LanaiMCCodeEmitter::getBranchTargetOpValue(
}
#include "LanaiGenMCCodeEmitter.inc"
-} // namespace
-} // namespace llvm
+
+} // end namespace llvm
llvm::MCCodeEmitter *
llvm::createLanaiMCCodeEmitter(const MCInstrInfo &InstrInfo,
diff --git a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index c2f8c0f7ad50..a47ff9ff3d61 100644
--- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -11,16 +11,21 @@
//
//===----------------------------------------------------------------------===//
+#include "LanaiMCAsmInfo.h"
#include "LanaiMCTargetDesc.h"
-
#include "InstPrinter/LanaiInstPrinter.h"
-#include "LanaiMCAsmInfo.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#include <cstdint>
+#include <string>
#define GET_INSTRINFO_MC_DESC
#include "LanaiGenInstrInfo.inc"
@@ -70,7 +75,7 @@ static MCInstPrinter *createLanaiMCInstPrinter(const Triple & /*T*/,
const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
return new LanaiInstPrinter(MAI, MII, MRI);
- return 0;
+ return nullptr;
}
static MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple,
@@ -79,6 +84,7 @@ static MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple,
}
namespace {
+
class LanaiMCInstrAnalysis : public MCInstrAnalysis {
public:
explicit LanaiMCInstrAnalysis(const MCInstrInfo *Info)
@@ -107,6 +113,7 @@ public:
}
}
};
+
} // end anonymous namespace
static MCInstrAnalysis *createLanaiInstrAnalysis(const MCInstrInfo *Info) {
@@ -131,7 +138,7 @@ extern "C" void LLVMInitializeLanaiTargetMC() {
// Register the MC code emitter
TargetRegistry::RegisterMCCodeEmitter(getTheLanaiTarget(),
- llvm::createLanaiMCCodeEmitter);
+ createLanaiMCCodeEmitter);
// Register the ASM Backend
TargetRegistry::RegisterMCAsmBackend(getTheLanaiTarget(),
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index d3c88482f092..05acd25ae5fc 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -47,7 +47,7 @@ namespace llvm {
FCTIDZ, FCTIWZ,
/// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
- /// unsigned integers.
+ /// unsigned integers with round toward zero.
FCTIDUZ, FCTIWUZ,
/// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 03b2257a88a8..fbec8787ef8d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1154,6 +1154,9 @@ defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
"fctid", "$frD, $frB", IIC_FPGeneral,
[]>, isPPC64;
+defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctidu", "$frD, $frB", IIC_FPGeneral,
+ []>, isPPC64;
defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
"fctidz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 99689f656c2d..ef7d2012a233 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -603,6 +603,12 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XForm_17<opcode, xo, OOL, IOL, asmstr, itin > {
+ let FRA = 0;
+}
+
// Used for QPX
class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index a7231bd2e2c0..90111bbea07d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2172,11 +2172,19 @@ let isCompare = 1, hasSideEffects = 0 in {
"fcmpu $crD, $fA, $fB", IIC_FPCompare>;
}
+def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+ "ftdiv $crD, $fA, $fB", IIC_FPCompare>;
+def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB),
+ "ftsqrt $crD, $fB", IIC_FPCompare>;
+
let Uses = [RM] in {
let hasSideEffects = 0 in {
defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiw", "$frD, $frB", IIC_FPGeneral,
[]>;
+ defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwu", "$frD, $frB", IIC_FPGeneral,
+ []>;
defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiwz", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (PPCfctiwz f64:$frB))]>;
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index fd2189397279..7f72ab17f619 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -16985,10 +16985,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
}
- if (Cond.getOpcode() == ISD::SETCC) {
+ if (Cond.getOpcode() == ISD::SETCC)
if (SDValue NewCond = LowerSETCC(Cond, DAG))
Cond = NewCond;
- }
// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
@@ -18289,6 +18288,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
/// constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
SDValue SrcOp, SDValue ShAmt,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT SVT = ShAmt.getSimpleValueType();
assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
@@ -18306,27 +18306,32 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
}
- const X86Subtarget &Subtarget =
- static_cast<const X86Subtarget &>(DAG.getSubtarget());
- if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
- ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
- // Let the shuffle legalizer expand this shift amount node.
+ // Need to build a vector containing shift amount.
+ // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
+ // +=================+============+=======================================+
+ // | ShAmt is | HasSSE4.1? | Construct ShAmt vector as |
+ // +=================+============+=======================================+
+ // | i64 | Yes, No | Use ShAmt as lowest elt |
+ // | i32 | Yes | zero-extend in-reg |
+ // | (i32 zext(i16)) | Yes | zero-extend in-reg |
+ // | i16/i32 | No | v4i32 build_vector(ShAmt, 0, ud, ud)) |
+ // +=================+============+=======================================+
+
+ if (SVT == MVT::i64)
+ ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt);
+ else if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
+ ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
SDValue Op0 = ShAmt.getOperand(0);
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0);
- ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, Subtarget, DAG);
+ ShAmt = DAG.getZeroExtendVectorInReg(Op0, SDLoc(Op0), MVT::v2i64);
+ } else if (Subtarget.hasSSE41() &&
+ ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
+ ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
} else {
- // Need to build a vector containing shift amount.
- // SSE/AVX packed shifts only use the lower 64-bit of the shift count.
- SmallVector<SDValue, 4> ShOps;
- ShOps.push_back(ShAmt);
- if (SVT == MVT::i32) {
- ShOps.push_back(DAG.getConstant(0, dl, SVT));
- ShOps.push_back(DAG.getUNDEF(SVT));
- }
- ShOps.push_back(DAG.getUNDEF(SVT));
-
- MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
- ShAmt = DAG.getBuildVector(BVT, dl, ShOps);
+ SmallVector<SDValue, 4> ShOps = {ShAmt, DAG.getConstant(0, dl, SVT),
+ DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
+ ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps);
}
// The return type has to be a 128-bit type with the same element
@@ -19014,7 +19019,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
}
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
+ Op.getOperand(1), Op.getOperand(2), Subtarget,
+ DAG);
case COMPRESS_EXPAND_IN_REG: {
SDValue Mask = Op.getOperand(3);
SDValue DataToCompress = Op.getOperand(1);
@@ -21276,7 +21282,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
else if (EltVT.bitsLT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
- return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, DAG);
+ return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG);
}
}
@@ -25951,12 +25957,11 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ bool FloatDomain,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
- bool FloatDomain = MaskVT.isFloatingPoint() ||
- (!Subtarget.hasAVX2() && MaskVT.is256BitVector());
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
@@ -26067,11 +26072,11 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// permute instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ bool FloatDomain,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
- bool FloatDomain = MaskVT.isFloatingPoint();
bool ContainsZeros = false;
SmallBitVector Zeroable(NumMaskElts, false);
@@ -26211,11 +26216,10 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// shuffle instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- SDValue &V1, SDValue &V2,
+ bool FloatDomain, SDValue &V1, SDValue &V2,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
bool IsUnary) {
- bool FloatDomain = MaskVT.isFloatingPoint();
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
@@ -26310,13 +26314,13 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ bool FloatDomain,
SDValue &V1, SDValue &V2,
SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
- bool FloatDomain = MaskVT.isFloatingPoint();
// Attempt to match against PALIGNR byte rotate.
if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
@@ -26594,8 +26598,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleSrcVT,
- ShuffleVT)) {
+ if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget, Shuffle,
+ ShuffleSrcVT, ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26609,8 +26613,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Subtarget, Shuffle,
- ShuffleVT, PermuteImm)) {
+ if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget,
+ Shuffle, ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26626,8 +26630,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- if (matchBinaryVectorShuffle(MaskVT, Mask, V1, V2, Subtarget, Shuffle,
- ShuffleVT, UnaryShuffle)) {
+ if (matchBinaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, Subtarget,
+ Shuffle, ShuffleVT, UnaryShuffle)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26643,8 +26647,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, V1, V2, DL, DAG, Subtarget,
- Shuffle, ShuffleVT, PermuteImm)) {
+ if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, DL,
+ DAG, Subtarget, Shuffle, ShuffleVT,
+ PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -28742,6 +28747,27 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
DAG.getConstant(Imm, DL, MVT::i8)));
return true;
}
+ case ISD::EXTRACT_SUBVECTOR: {
+ unsigned EltSize = EltVT.getSizeInBits();
+ if (EltSize != 32 && EltSize != 64)
+ return false;
+ MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
+ // Only change element size, not type.
+ if (VT.isInteger() != OpEltVT.isInteger())
+ return false;
+ uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
+ // Op0 needs to be bitcasted to a larger vector with the same element type.
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = MVT::getVectorVT(EltVT,
+ Op0.getSimpleValueType().getSizeInBits() / EltSize);
+ Op0 = DAG.getBitcast(Op0VT, Op0);
+ DCI.AddToWorklist(Op0.getNode());
+ DCI.CombineTo(OrigOp.getNode(),
+ DAG.getNode(Opcode, DL, VT, Op0,
+ DAG.getConstant(Imm, DL, MVT::i8)));
+ return true;
+ }
}
return false;
@@ -30921,6 +30947,59 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}
+/// Check if truncation with saturation form type \p SrcVT to \p DstVT
+/// is valid for the given \p Subtarget.
+static bool
+isSATValidOnSubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX512())
+ return false;
+ EVT SrcElVT = SrcVT.getScalarType();
+ EVT DstElVT = DstVT.getScalarType();
+ if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
+ return false;
+ if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
+ return false;
+ if (SrcVT.is512BitVector() || Subtarget.hasVLX())
+ return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
+ return false;
+}
+
+/// Detect a pattern of truncation with saturation:
+/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched or the unsupported on the current target.
+static SDValue
+detectUSatPattern(SDValue In, EVT VT, const X86Subtarget &Subtarget) {
+ if (In.getOpcode() != ISD::UMIN)
+ return SDValue();
+
+ EVT InVT = In.getValueType();
+ // FIXME: Scalar type may be supported if we move it to vector register.
+ if (!InVT.isVector() || !InVT.isSimple())
+ return SDValue();
+
+ if (!isSATValidOnSubtarget(InVT, VT, Subtarget))
+ return SDValue();
+
+ //Saturation with truncation. We truncate from InVT to VT.
+ assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
+ "Unexpected types for truncate operation");
+
+ SDValue SrcVal;
+ APInt C;
+ if (ISD::isConstantSplatVector(In.getOperand(0).getNode(), C))
+ SrcVal = In.getOperand(1);
+ else if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C))
+ SrcVal = In.getOperand(0);
+ else
+ return SDValue();
+
+ // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
+ // the element size of the destination type.
+ return (C == ((uint64_t)1 << VT.getScalarSizeInBits()) - 1) ?
+ SrcVal : SDValue();
+}
+
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction.
@@ -31487,6 +31566,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
+ if (SDValue Val =
+ detectUSatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
+ return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
+ dl, Val, St->getBasePtr(),
+ St->getMemoryVT(), St->getMemOperand(), DAG);
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
@@ -31967,7 +32052,8 @@ combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
static SDValue
-combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
+combineVectorTruncationWithPACKSS(SDNode *N, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG,
SmallVector<SDValue, 8> &Regs) {
assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32);
EVT OutVT = N->getValueType(0);
@@ -31976,8 +32062,10 @@ combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
// Shift left by 16 bits, then arithmetic-shift right by 16 bits.
SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32);
for (auto &Reg : Regs) {
- Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, DAG);
- Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, DAG);
+ Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt,
+ Subtarget, DAG);
+ Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt,
+ Subtarget, DAG);
}
for (unsigned i = 0, e = Regs.size() / 2; i < e; i++)
@@ -32046,7 +32134,7 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasSSE41() || OutSVT == MVT::i8)
return combineVectorTruncationWithPACKUS(N, DAG, SubVec);
else if (InSVT == MVT::i32)
- return combineVectorTruncationWithPACKSS(N, DAG, SubVec);
+ return combineVectorTruncationWithPACKSS(N, Subtarget, DAG, SubVec);
else
return SDValue();
}
@@ -32104,6 +32192,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
+ // Try the truncation with unsigned saturation.
+ if (SDValue Val = detectUSatPattern(Src, VT, Subtarget))
+ return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Val);
+
// The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index d7792e296a58..de4839432b9a 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -80,9 +80,12 @@ unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) {
unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
if (Vector) {
- if (ST->hasAVX512()) return 512;
- if (ST->hasAVX()) return 256;
- if (ST->hasSSE1()) return 128;
+ if (ST->hasAVX512())
+ return 512;
+ if (ST->hasAVX())
+ return 256;
+ if (ST->hasSSE1())
+ return 128;
return 0;
}
@@ -211,11 +214,9 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for AVX512DQ lowering tricks for custom cases.
- if (ST->hasDQI()) {
- if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD,
- LT.second))
+ if (ST->hasDQI())
+ if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX512BWCostTable[] = {
{ ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence.
@@ -225,37 +226,38 @@ int X86TTIImpl::getArithmeticInstrCost(
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
{ ISD::SDIV, MVT::v64i8, 64*20 },
{ ISD::SDIV, MVT::v32i16, 32*20 },
- { ISD::SDIV, MVT::v16i32, 16*20 },
- { ISD::SDIV, MVT::v8i64, 8*20 },
{ ISD::UDIV, MVT::v64i8, 64*20 },
- { ISD::UDIV, MVT::v32i16, 32*20 },
- { ISD::UDIV, MVT::v16i32, 16*20 },
- { ISD::UDIV, MVT::v8i64, 8*20 },
+ { ISD::UDIV, MVT::v32i16, 32*20 }
};
// Look for AVX512BW lowering tricks for custom cases.
- if (ST->hasBWI()) {
- if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD,
- LT.second))
+ if (ST->hasBWI())
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX512CostTable[] = {
- { ISD::SHL, MVT::v16i32, 1 },
- { ISD::SRL, MVT::v16i32, 1 },
- { ISD::SRA, MVT::v16i32, 1 },
- { ISD::SHL, MVT::v8i64, 1 },
- { ISD::SRL, MVT::v8i64, 1 },
- { ISD::SRA, MVT::v8i64, 1 },
-
- { ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence.
- { ISD::MUL, MVT::v16i8, 5 }, // extend/pmullw/trunc sequence.
+ { ISD::SHL, MVT::v16i32, 1 },
+ { ISD::SRL, MVT::v16i32, 1 },
+ { ISD::SRA, MVT::v16i32, 1 },
+ { ISD::SHL, MVT::v8i64, 1 },
+ { ISD::SRL, MVT::v8i64, 1 },
+ { ISD::SRA, MVT::v8i64, 1 },
+
+ { ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v16i8, 5 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v16i32, 1 }, // pmulld
+ { ISD::MUL, MVT::v8i64, 8 }, // 3*pmuludq/3*shift/2*add
+
+ // Vectorizing division is a bad idea. See the SSE2 table for more comments.
+ { ISD::SDIV, MVT::v16i32, 16*20 },
+ { ISD::SDIV, MVT::v8i64, 8*20 },
+ { ISD::UDIV, MVT::v16i32, 16*20 },
+ { ISD::UDIV, MVT::v8i64, 8*20 }
};
- if (ST->hasAVX512()) {
+ if (ST->hasAVX512())
if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
@@ -315,10 +317,9 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for XOP lowering tricks.
- if (ST->hasXOP()) {
+ if (ST->hasXOP())
if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX2CustomCostTable[] = {
{ ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
@@ -334,6 +335,8 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v8i32, 1 }, // pmulld
+ { ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add
{ ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
@@ -344,11 +347,10 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for AVX2 lowering tricks for custom cases.
- if (ST->hasAVX2()) {
+ if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVXCustomCostTable[] = {
{ ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence.
@@ -372,24 +374,10 @@ int X86TTIImpl::getArithmeticInstrCost(
};
// Look for AVX2 lowering tricks for custom cases.
- if (ST->hasAVX()) {
+ if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD,
LT.second))
return LT.first * Entry->Cost;
- }
-
- static const CostTblEntry SSE42FloatCostTable[] = {
- { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::f64, 22 }, // Nehalem from http://www.agner.org/
- { ISD::FDIV, MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/
- };
-
- if (ST->hasSSE42()) {
- if (const auto *Entry = CostTableLookup(SSE42FloatCostTable, ISD,
- LT.second))
- return LT.first * Entry->Cost;
- }
static const CostTblEntry
SSE2UniformCostTable[] = {
@@ -452,6 +440,17 @@ int X86TTIImpl::getArithmeticInstrCost(
ISD = ISD::MUL;
}
+ static const CostTblEntry SSE42CostTable[] = {
+ { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::f64, 22 }, // Nehalem from http://www.agner.org/
+ { ISD::FDIV, MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/
+ };
+
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+
static const CostTblEntry SSE41CostTable[] = {
{ ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence.
{ ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence.
@@ -471,44 +470,39 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v16i16, 2*14 }, // pblendvb sequence.
{ ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend.
{ ISD::SRA, MVT::v8i32, 2*12 }, // Shift each lane + blend.
+
+ { ISD::MUL, MVT::v4i32, 1 } // pmulld
};
- if (ST->hasSSE41()) {
+ if (ST->hasSSE41())
if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
{ ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
- { ISD::SHL, MVT::v32i8, 2*26 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SHL, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v8i32, 2*2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence.
{ ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
{ ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v32i8, 2*26 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRL, MVT::v8i32, 2*16 }, // Shift each lane + blend.
{ ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence.
{ ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
{ ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
- { ISD::SRA, MVT::v32i8, 2*54 }, // unpacked cmpgtb sequence.
{ ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRA, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRA, MVT::v8i32, 2*16 }, // Shift each lane + blend.
{ ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence.
{ ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
{ ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
+ { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
+ { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
{ ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/
@@ -531,10 +525,9 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::UDIV, MVT::v2i64, 2*20 },
};
- if (ST->hasSSE2()) {
+ if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
static const CostTblEntry AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
@@ -553,307 +546,278 @@ int X86TTIImpl::getArithmeticInstrCost(
// A v4i64 multiply is custom lowered as two split v2i64 vectors that then
// are lowered as a series of long multiplies(3), shifts(3) and adds(2)
// Because we believe v4i64 to be a legal type, we must also include the
- // split factor of two in the cost table. Therefore, the cost here is 16
+ // extract+insert in the cost table. Therefore, the cost here is 18
// instead of 8.
- { ISD::MUL, MVT::v4i64, 16 },
+ { ISD::MUL, MVT::v4i64, 18 },
};
// Look for AVX1 lowering tricks.
- if (ST->hasAVX() && !ST->hasAVX2()) {
- MVT VT = LT.second;
-
- if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, VT))
+ if (ST->hasAVX() && !ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- }
- // Custom lowering of vectors.
- static const CostTblEntry CustomLowered[] = {
- // A v2i64/v4i64 and multiply is custom lowered as a series of long
- // multiplies(3), shifts(3) and adds(2).
- { ISD::MUL, MVT::v2i64, 8 },
- { ISD::MUL, MVT::v4i64, 8 },
- { ISD::MUL, MVT::v8i64, 8 }
- };
- if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second))
- return LT.first * Entry->Cost;
-
- // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
- // 2x pmuludq, 2x shuffle.
- if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
- !ST->hasSSE41())
- return LT.first * 6;
-
- static const CostTblEntry SSE1FloatCostTable[] = {
+ static const CostTblEntry SSE1CostTable[] = {
{ ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
};
if (ST->hasSSE1())
- if (const auto *Entry = CostTableLookup(SSE1FloatCostTable, ISD,
- LT.second))
+ if (const auto *Entry = CostTableLookup(SSE1CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
+
// Fallback to the default implementation.
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
}
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
- if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) {
- // 64-bit packed float vectors (v2f32) are widened to type v4f32.
- // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
- static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
- { TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb
- };
-
- if (ST->hasVBMI())
- if (const auto *Entry =
- CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ // 64-bit packed float vectors (v2f32) are widened to type v4f32.
+ // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ // For Broadcasts we are splatting the first element from the first input
+ // register, so only need to reference that input and all the output
+ // registers are the same.
+ if (Kind == TTI::SK_Broadcast)
+ LT.first = 1;
+
+ // We are going to permute multiple sources and the result will be in multiple
+ // destinations. Providing an accurate cost only for splits where the element
+ // type remains the same.
+ if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) {
+ MVT LegalVT = LT.second;
+ if (LegalVT.getVectorElementType().getSizeInBits() ==
+ Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
+ LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
+
+ unsigned VecTySize = DL.getTypeStoreSize(Tp);
+ unsigned LegalVTSize = LegalVT.getStoreSize();
+ // Number of source vectors after legalization:
+ unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
+ // Number of destination vectors after legalization:
+ unsigned NumOfDests = LT.first;
+
+ Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
+ LegalVT.getVectorNumElements());
+
+ unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
+ return NumOfShuffles *
+ getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr);
+ }
- static const CostTblEntry AVX512BWShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
- { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
- { TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128
- // + 2*pshufb + vinserti64x4
- };
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ }
- if (ST->hasBWI())
- if (const auto *Entry =
- CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ // For 2-input shuffles, we must account for splitting the 2 inputs into many.
+ if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) {
+ // We assume that source and destination have the same vector type.
+ int NumOfDests = LT.first;
+ int NumOfShufflesPerDest = LT.first * 2 - 1;
+ LT.first = NumOfDests * NumOfShufflesPerDest;
+ }
- static const CostTblEntry AVX512ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
- { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
- { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
- { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
- };
+ static const CostTblEntry AVX512VBMIShuffleTbl[] = {
+ { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
+ { TTI::SK_Reverse, MVT::v32i8, 1 }, // vpermb
- if (ST->hasAVX512())
- if (const auto *Entry =
- CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ { TTI::SK_PermuteSingleSrc, MVT::v64i8, 1 }, // vpermb
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, 1 }, // vpermb
- static const CostTblEntry AVX2ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
- { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
- { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
- { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
- { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
- { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
+ { TTI::SK_PermuteTwoSrc, MVT::v64i8, 1 }, // vpermt2b
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, 1 }, // vpermt2b
+ { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 } // vpermt2b
+ };
- { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
- { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
- };
+ if (ST->hasVBMI())
+ if (const auto *Entry =
+ CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX2())
- if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX512BWShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v32i16, 1 }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v64i8, 1 }, // vpbroadcastb
+
+ { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
+ { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
+ { TTI::SK_Reverse, MVT::v64i8, 6 }, // vextracti64x4 + 2*vperm2i128
+ // + 2*pshufb + vinserti64x4
+
+ { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // vpermw
+ { TTI::SK_PermuteSingleSrc, MVT::v64i8, 8 }, // extend to v32i16
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, 3 }, // vpermw + zext/trunc
+
+ { TTI::SK_PermuteTwoSrc, MVT::v32i16, 1 }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v16i16, 1 }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpermt2w
+ { TTI::SK_PermuteTwoSrc, MVT::v32i8, 3 }, // zext + vpermt2w + trunc
+ { TTI::SK_PermuteTwoSrc, MVT::v64i8, 19 }, // 6 * v32i8 + 1
+ { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 } // zext + vpermt2w + trunc
+ };
- static const CostTblEntry AVX1ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
- { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
- { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
- { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
-
- { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
- { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
- { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
- { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
- { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
- { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor
- };
+ if (ST->hasBWI())
+ if (const auto *Entry =
+ CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX512ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v8f64, 1 }, // vbroadcastpd
+ { TTI::SK_Broadcast, MVT::v16f32, 1 }, // vbroadcastps
+ { TTI::SK_Broadcast, MVT::v8i64, 1 }, // vpbroadcastq
+ { TTI::SK_Broadcast, MVT::v16i32, 1 }, // vpbroadcastd
+
+ { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
+ { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
+ { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
+ { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
+
+ { TTI::SK_PermuteSingleSrc, MVT::v8f64, 1 }, // vpermpd
+ { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
+ { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // vpermpd
+ { TTI::SK_PermuteSingleSrc, MVT::v16f32, 1 }, // vpermps
+ { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
+ { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // vpermps
+ { TTI::SK_PermuteSingleSrc, MVT::v8i64, 1 }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v16i32, 1 }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
+
+ { TTI::SK_PermuteTwoSrc, MVT::v8f64, 1 }, // vpermt2pd
+ { TTI::SK_PermuteTwoSrc, MVT::v16f32, 1 }, // vpermt2ps
+ { TTI::SK_PermuteTwoSrc, MVT::v8i64, 1 }, // vpermt2q
+ { TTI::SK_PermuteTwoSrc, MVT::v16i32, 1 }, // vpermt2d
+ { TTI::SK_PermuteTwoSrc, MVT::v4f64, 1 }, // vpermt2pd
+ { TTI::SK_PermuteTwoSrc, MVT::v8f32, 1 }, // vpermt2ps
+ { TTI::SK_PermuteTwoSrc, MVT::v4i64, 1 }, // vpermt2q
+ { TTI::SK_PermuteTwoSrc, MVT::v8i32, 1 }, // vpermt2d
+ { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // vpermt2pd
+ { TTI::SK_PermuteTwoSrc, MVT::v4f32, 1 }, // vpermt2ps
+ { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // vpermt2q
+ { TTI::SK_PermuteTwoSrc, MVT::v4i32, 1 } // vpermt2d
+ };
- static const CostTblEntry SSE41ShuffleTbl[] = {
- { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
- { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
- { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
- };
-
- if (ST->hasSSE41())
- if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry SSSE3ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
- { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
+ static const CostTblEntry AVX2ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v4f64, 1 }, // vbroadcastpd
+ { TTI::SK_Broadcast, MVT::v8f32, 1 }, // vbroadcastps
+ { TTI::SK_Broadcast, MVT::v4i64, 1 }, // vpbroadcastq
+ { TTI::SK_Broadcast, MVT::v8i32, 1 }, // vpbroadcastd
+ { TTI::SK_Broadcast, MVT::v16i16, 1 }, // vpbroadcastw
+ { TTI::SK_Broadcast, MVT::v32i8, 1 }, // vpbroadcastb
+
+ { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
+ { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
+ { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
+ { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
+ { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
+
+ { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
+ { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
+ };
- { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
- { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
- };
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- if (ST->hasSSSE3())
- if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ static const CostTblEntry AVX1ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Broadcast, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Broadcast, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Broadcast, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Broadcast, MVT::v16i16, 3 }, // vpshuflw + vpshufd + vinsertf128
+ { TTI::SK_Broadcast, MVT::v32i8, 2 }, // vpshufb + vinsertf128
+
+ { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
+ // + vinsertf128
+ { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
+ // + vinsertf128
+
+ { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
+ { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
+ { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
+ { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
+ { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
+ { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor
+ };
- static const CostTblEntry SSE2ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
- { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
- { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
- { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
- { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + packus
-
- { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
- { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
- { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
- { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
- };
-
- if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry SSE1ShuffleTbl[] = {
- { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
- { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
- };
+ static const CostTblEntry SSE41ShuffleTbl[] = {
+ { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
+ { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
+ };
- if (ST->hasSSE1())
- if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
- return LT.first * Entry->Cost;
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
- } else if (Kind == TTI::SK_PermuteTwoSrc) {
- // We assume that source and destination have the same vector type.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- int NumOfDests = LT.first;
- int NumOfShufflesPerDest = LT.first * 2 - 1;
- int NumOfShuffles = NumOfDests * NumOfShufflesPerDest;
-
- static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermt2b
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 1}, // vpermt2b
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1} // vpermt2b
- };
-
- if (ST->hasVBMI())
- if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return NumOfShuffles * Entry->Cost;
-
- static const CostTblEntry AVX512BWShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermt2w
- {ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermt2w
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, // vpermt2w
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 3}, // zext + vpermt2w + trunc
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 19}, // 6 * v32i8 + 1
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // zext + vpermt2w + trunc
- };
-
- if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return NumOfShuffles * Entry->Cost;
-
- static const CostTblEntry AVX512ShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v8f64, 1}, // vpermt2pd
- {ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermt2ps
- {ISD::VECTOR_SHUFFLE, MVT::v8i64, 1}, // vpermt2q
- {ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermt2d
- {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vpermt2pd
- {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vpermt2ps
- {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vpermt2q
- {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vpermt2d
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // vpermt2pd
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, // vpermt2ps
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // vpermt2q
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1} // vpermt2d
- };
+ static const CostTblEntry SSSE3ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v8i16, 1 }, // pshufb
+ { TTI::SK_Broadcast, MVT::v16i8, 1 }, // pshufb
- if (ST->hasAVX512())
- if (const auto *Entry =
- CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
- return NumOfShuffles * Entry->Cost;
-
- } else if (Kind == TTI::SK_PermuteSingleSrc) {
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- if (LT.first == 1) {
-
- static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermb
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 1} // vpermb
- };
-
- if (ST->hasVBMI())
- if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return Entry->Cost;
-
- static const CostTblEntry AVX512BWShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermw
- {ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermw
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, // vpermw
- {ISD::VECTOR_SHUFFLE, MVT::v64i8, 8}, // extend to v32i16
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 3} // vpermw + zext/trunc
- };
-
- if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return Entry->Cost;
-
- static const CostTblEntry AVX512ShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v8f64, 1}, // vpermpd
- {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vpermpd
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // vpermpd
- {ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermps
- {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vpermps
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, // vpermps
- {ISD::VECTOR_SHUFFLE, MVT::v8i64, 1}, // vpermq
- {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vpermq
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // vpermq
- {ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermd
- {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vpermd
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, // vpermd
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1} // pshufb
- };
-
- if (ST->hasAVX512())
- if (const auto *Entry =
- CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
- return Entry->Cost;
-
- } else {
- // We are going to permute multiple sources and the result will be in
- // multiple destinations. Providing an accurate cost only for splits where
- // the element type remains the same.
-
- MVT LegalVT = LT.second;
- if (LegalVT.getVectorElementType().getSizeInBits() ==
- Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
- LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
-
- unsigned VecTySize = DL.getTypeStoreSize(Tp);
- unsigned LegalVTSize = LegalVT.getStoreSize();
- // Number of source vectors after legalization:
- unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
- // Number of destination vectors after legalization:
- unsigned NumOfDests = LT.first;
-
- Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
- LegalVT.getVectorNumElements());
-
- unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
- return NumOfShuffles *
- getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr);
- }
- }
- }
+ { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
+ { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
+
+ { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
+ };
+
+ if (ST->hasSSSE3())
+ if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
+ static const CostTblEntry SSE2ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v2f64, 1 }, // shufpd
+ { TTI::SK_Broadcast, MVT::v2i64, 1 }, // pshufd
+ { TTI::SK_Broadcast, MVT::v4i32, 1 }, // pshufd
+ { TTI::SK_Broadcast, MVT::v8i16, 2 }, // pshuflw + pshufd
+ { TTI::SK_Broadcast, MVT::v16i8, 3 }, // unpck + pshuflw + pshufd
+
+ { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
+ { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
+ { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
+ { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
+ { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + packus
+
+ { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
+ { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
+ };
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
+ static const CostTblEntry SSE1ShuffleTbl[] = {
+ { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
+ };
+
+ if (ST->hasSSE1())
+ if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
@@ -1623,17 +1587,29 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
return Cost+LT.first;
}
-int X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
- if (Ty->isVectorTy() && IsComplex)
- return NumVectorInstToHideOverhead;
+ // Cost modeling of Strided Access Computation is hidden by the indexing
+ // modes of X86 regardless of the stride value. We dont believe that there
+ // is a difference between constant strided access in gerenal and constant
+ // strided value which is less than or equal to 64.
+ // Even in the case of (loop invariant) stride whose value is not known at
+ // compile time, the address computation will not incur more than one extra
+ // ADD instruction.
+ if (Ty->isVectorTy() && SE) {
+ if (!BaseT::isStridedAccess(Ptr))
+ return NumVectorInstToHideOverhead;
+ if (!BaseT::getConstantStrideStep(SE, Ptr))
+ return 1;
+ }
- return BaseT::getAddressComputationCost(Ty, IsComplex);
+ return BaseT::getAddressComputationCost(Ty, SE, Ptr);
}
int X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
index f6bcb9f569e4..c013805f4321 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -71,7 +71,8 @@ public:
unsigned AddressSpace);
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment);
- int getAddressComputationCost(Type *PtrTy, bool IsComplex);
+ int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
+ const SCEV *Ptr);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF);