aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-02 21:25:48 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-02 21:25:48 +0000
commitd88c1a5a572cdb661c111098831fa526e933756f (patch)
tree97b32c3372106ac47ded3d1a99f9c023a8530073 /contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
parent715652a404ee99f10c09c0a5edbb5883961b8c25 (diff)
parentb915e9e0fc85ba6f398b3fab0db6a81a8913af94 (diff)
Update llvm to trunk r290819 and resolve conflicts.
Notes
Notes: svn path=/projects/clang400-import/; revision=311142
Diffstat (limited to 'contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp299
1 files changed, 236 insertions, 63 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index d39542a8e4eb..b50749a29b89 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -29,16 +30,28 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AArch64GenInstrInfo.inc"
-static LLVM_CONSTEXPR MachineMemOperand::Flags MOSuppressPair =
+static const MachineMemOperand::Flags MOSuppressPair =
MachineMemOperand::MOTargetFlag1;
+static cl::opt<unsigned>
+TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
+ cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
+
+static cl::opt<unsigned>
+CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
+ cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
+
+static cl::opt<unsigned>
+BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
+ cl::desc("Restrict range of Bcc instructions (DEBUG)"));
+
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
RI(STI.getTargetTriple()), Subtarget(STI) {}
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
-unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
+unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const MachineBasicBlock &MBB = *MI.getParent();
const MachineFunction *MF = MBB.getParent();
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
@@ -46,22 +59,41 @@ unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
if (MI.getOpcode() == AArch64::INLINEASM)
return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+ // FIXME: We currently only handle pseudoinstructions that don't get expanded
+ // before the assembly printer.
+ unsigned NumBytes = 0;
const MCInstrDesc &Desc = MI.getDesc();
switch (Desc.getOpcode()) {
default:
- // Anything not explicitly designated otherwise is a nomal 4-byte insn.
- return 4;
+ // Anything not explicitly designated otherwise is a normal 4-byte insn.
+ NumBytes = 4;
+ break;
case TargetOpcode::DBG_VALUE:
case TargetOpcode::EH_LABEL:
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
- return 0;
+ NumBytes = 0;
+ break;
+ case TargetOpcode::STACKMAP:
+ // The upper bound for a stackmap intrinsic is the full length of its shadow
+ NumBytes = StackMapOpers(&MI).getNumPatchBytes();
+ assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+ break;
+ case TargetOpcode::PATCHPOINT:
+ // The size of the patchpoint intrinsic is the number of bytes requested
+ NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
+ assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+ break;
+ case AArch64::TLSDESC_CALLSEQ:
+ // This gets lowered to an instruction sequence which takes 16 bytes
+ NumBytes = 16;
+ break;
case AArch64::TLSDESC_CALLSEQ:
// This gets lowered to an instruction sequence which takes 16 bytes
return 16;
}
- llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
+ return NumBytes;
}
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
@@ -95,6 +127,56 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
}
}
+static unsigned getBranchDisplacementBits(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("unexpected opcode!");
+ case AArch64::B:
+ return 64;
+ case AArch64::TBNZW:
+ case AArch64::TBZW:
+ case AArch64::TBNZX:
+ case AArch64::TBZX:
+ return TBZDisplacementBits;
+ case AArch64::CBNZW:
+ case AArch64::CBZW:
+ case AArch64::CBNZX:
+ case AArch64::CBZX:
+ return CBZDisplacementBits;
+ case AArch64::Bcc:
+ return BCCDisplacementBits;
+ }
+}
+
+bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
+ int64_t BrOffset) const {
+ unsigned Bits = getBranchDisplacementBits(BranchOp);
+ assert(Bits >= 3 && "max branch displacement must be enough to jump"
+ "over conditional branch expansion");
+ return isIntN(Bits, BrOffset / 4);
+}
+
+MachineBasicBlock *AArch64InstrInfo::getBranchDestBlock(
+ const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected opcode!");
+ case AArch64::B:
+ return MI.getOperand(0).getMBB();
+ case AArch64::TBZW:
+ case AArch64::TBNZW:
+ case AArch64::TBZX:
+ case AArch64::TBNZX:
+ return MI.getOperand(2).getMBB();
+ case AArch64::CBZW:
+ case AArch64::CBNZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZX:
+ case AArch64::Bcc:
+ return MI.getOperand(1).getMBB();
+ }
+}
+
// Branch analysis.
bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
@@ -183,7 +265,7 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return true;
}
-bool AArch64InstrInfo::ReverseBranchCondition(
+bool AArch64InstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
if (Cond[0].getImm() != -1) {
// Regular Bcc
@@ -224,7 +306,8 @@ bool AArch64InstrInfo::ReverseBranchCondition(
return false;
}
-unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
+ int *BytesRemoved) const {
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
if (I == MBB.end())
return 0;
@@ -238,14 +321,23 @@ unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
I = MBB.end();
- if (I == MBB.begin())
+ if (I == MBB.begin()) {
+ if (BytesRemoved)
+ *BytesRemoved = 4;
return 1;
+ }
--I;
- if (!isCondBranchOpcode(I->getOpcode()))
+ if (!isCondBranchOpcode(I->getOpcode())) {
+ if (BytesRemoved)
+ *BytesRemoved = 4;
return 1;
+ }
// Remove the branch.
I->eraseFromParent();
+ if (BytesRemoved)
+ *BytesRemoved = 8;
+
return 2;
}
@@ -266,25 +358,34 @@ void AArch64InstrInfo::instantiateCondBranch(
}
}
-unsigned AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond,
- const DebugLoc &DL) const {
+ const DebugLoc &DL,
+ int *BytesAdded) const {
// Shouldn't be a fall through.
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert(TBB && "insertBranch must not be told to insert a fallthrough");
if (!FBB) {
if (Cond.empty()) // Unconditional branch?
BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
else
instantiateCondBranch(MBB, DL, TBB, Cond);
+
+ if (BytesAdded)
+ *BytesAdded = 4;
+
return 1;
}
// Two-way conditional branch.
instantiateCondBranch(MBB, DL, TBB, Cond);
BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
+
+ if (BytesAdded)
+ *BytesAdded = 8;
+
return 2;
}
@@ -318,7 +419,8 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
// if NZCV is used, do not fold.
if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
return 0;
- // fall-through to ADDXri and ADDWri.
+ // fall-through to ADDXri and ADDWri.
+ LLVM_FALLTHROUGH;
case AArch64::ADDXri:
case AArch64::ADDWri:
// add x, 1 -> csinc.
@@ -345,7 +447,8 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
// if NZCV is used, do not fold.
if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
return 0;
- // fall-through to SUBXrr and SUBWrr.
+ // fall-through to SUBXrr and SUBWrr.
+ LLVM_FALLTHROUGH;
case AArch64::SUBXrr:
case AArch64::SUBWrr: {
// neg x -> csneg, represented as sub dst, xzr, src.
@@ -861,9 +964,9 @@ static bool areCFlagsAccessedBetweenInstrs(
return true;
// From must be above To.
- assert(std::find_if(MachineBasicBlock::reverse_iterator(To),
- To->getParent()->rend(), [From](MachineInstr &MI) {
- return MachineBasicBlock::iterator(MI) == From;
+ assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
+ [From](MachineInstr &MI) {
+ return MI.getIterator() == From;
}) != To->getParent()->rend());
// We iterate backward starting \p To until we hit \p From.
@@ -971,6 +1074,7 @@ static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
return false;
}
+namespace {
struct UsedNZCV {
bool N;
bool Z;
@@ -985,6 +1089,7 @@ struct UsedNZCV {
return *this;
}
};
+} // end anonymous namespace
/// Find a condition code used by the instruction.
/// Returns AArch64CC::Invalid if either the instruction does not use condition
@@ -1529,7 +1634,6 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
break;
-
case AArch64::LDURQi:
case AArch64::STURQi:
case AArch64::LDRQui:
@@ -1544,36 +1648,8 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
const TargetRegisterInfo *TRI) const {
- switch (LdSt.getOpcode()) {
- default:
- return false;
- // Scaled instructions.
- case AArch64::STRSui:
- case AArch64::STRDui:
- case AArch64::STRQui:
- case AArch64::STRXui:
- case AArch64::STRWui:
- case AArch64::LDRSui:
- case AArch64::LDRDui:
- case AArch64::LDRQui:
- case AArch64::LDRXui:
- case AArch64::LDRWui:
- case AArch64::LDRSWui:
- // Unscaled instructions.
- case AArch64::STURSi:
- case AArch64::STURDi:
- case AArch64::STURQi:
- case AArch64::STURXi:
- case AArch64::STURWi:
- case AArch64::LDURSi:
- case AArch64::LDURDi:
- case AArch64::LDURQi:
- case AArch64::LDURWi:
- case AArch64::LDURXi:
- case AArch64::LDURSWi:
- unsigned Width;
- return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
- };
+ unsigned Width;
+ return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
}
bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
@@ -1772,6 +1848,9 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
if (NumLoads > 1)
return false;
+ if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
+ return false;
+
// Can we pair these instructions based on their opcodes?
unsigned FirstOpc = FirstLdSt.getOpcode();
unsigned SecondOpc = SecondLdSt.getOpcode();
@@ -1802,41 +1881,82 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
return Offset1 + 1 == Offset2;
}
-bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
- MachineInstr &Second) const {
- if (Subtarget.hasMacroOpFusion()) {
+bool AArch64InstrInfo::shouldScheduleAdjacent(
+ const MachineInstr &First, const MachineInstr &Second) const {
+ if (Subtarget.hasArithmeticBccFusion()) {
// Fuse CMN, CMP, TST followed by Bcc.
unsigned SecondOpcode = Second.getOpcode();
if (SecondOpcode == AArch64::Bcc) {
switch (First.getOpcode()) {
default:
return false;
- case AArch64::SUBSWri:
case AArch64::ADDSWri:
- case AArch64::ANDSWri:
- case AArch64::SUBSXri:
+ case AArch64::ADDSWrr:
case AArch64::ADDSXri:
+ case AArch64::ADDSXrr:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSXrr:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
return true;
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !hasShiftedReg(Second);
}
}
+ }
+ if (Subtarget.hasArithmeticCbzFusion()) {
// Fuse ALU operations followed by CBZ/CBNZ.
+ unsigned SecondOpcode = Second.getOpcode();
if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
switch (First.getOpcode()) {
default:
return false;
case AArch64::ADDWri:
+ case AArch64::ADDWrr:
case AArch64::ADDXri:
+ case AArch64::ADDXrr:
case AArch64::ANDWri:
+ case AArch64::ANDWrr:
case AArch64::ANDXri:
+ case AArch64::ANDXrr:
case AArch64::EORWri:
+ case AArch64::EORWrr:
case AArch64::EORXri:
+ case AArch64::EORXrr:
case AArch64::ORRWri:
+ case AArch64::ORRWrr:
case AArch64::ORRXri:
+ case AArch64::ORRXrr:
case AArch64::SUBWri:
+ case AArch64::SUBWrr:
case AArch64::SUBXri:
+ case AArch64::SUBXrr:
return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !hasShiftedReg(Second);
}
}
}
@@ -2189,7 +2309,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
@@ -2293,7 +2413,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -2481,6 +2601,57 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
}
}
+ // Handle the case where a copy is being spilled or refilled but the source
+ // and destination register class don't match. For example:
+ //
+ // %vreg0<def> = COPY %XZR; GPR64common:%vreg0
+ //
+ // In this case we can still safely fold away the COPY and generate the
+ // following spill code:
+ //
+ // STRXui %XZR, <fi#0>
+ //
+ // This also eliminates spilled cross register class COPYs (e.g. between x and
+ // d regs) of the same size. For example:
+ //
+ // %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
+ //
+ // will be refilled as
+ //
+ // LDRDui %vreg0, fi<#0>
+ //
+ // instead of
+ //
+ // LDRXui %vregTemp, fi<#0>
+ // %vreg0 = FMOV %vregTemp
+ //
+ if (MI.isFullCopy() && Ops.size() == 1 &&
+ // Make sure we're only folding the explicit COPY defs/uses.
+ (Ops[0] == 0 || Ops[0] == 1)) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineBasicBlock &MBB = *MI.getParent();
+ const MachineOperand &DstMO = MI.getOperand(0);
+ const MachineOperand &SrcMO = MI.getOperand(1);
+ unsigned DstReg = DstMO.getReg();
+ unsigned SrcReg = SrcMO.getReg();
+ auto getRegClass = [&](unsigned Reg) {
+ return TargetRegisterInfo::isVirtualRegister(Reg)
+ ? MRI.getRegClass(Reg)
+ : TRI.getMinimalPhysRegClass(Reg);
+ };
+ const TargetRegisterClass &DstRC = *getRegClass(DstReg);
+ const TargetRegisterClass &SrcRC = *getRegClass(SrcReg);
+ if (DstRC.getSize() == SrcRC.getSize()) {
+ if (Ops[0] == 0)
+ storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
+ &SrcRC, &TRI);
+ else
+ loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI);
+ return &*--InsertPt;
+ }
+ }
+
// Cannot fold.
return nullptr;
}
@@ -2829,6 +3000,8 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
// FP Opcodes that can be combined with a FMUL
static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
switch (Inst.getOpcode()) {
+ default:
+ break;
case AArch64::FADDSrr:
case AArch64::FADDDrr:
case AArch64::FADDv2f32:
@@ -2839,9 +3012,9 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
case AArch64::FSUBv2f32:
case AArch64::FSUBv2f64:
case AArch64::FSUBv4f32:
- return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
- default:
- break;
+ TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
+ return (Options.UnsafeFPMath ||
+ Options.AllowFPOpFusion == FPOpFusion::Fast);
}
return false;
}
@@ -3465,7 +3638,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
unsigned Val = Root.getOperand(3).getImm();
Imm = Imm << Val;
}
- uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
+ uint64_t UImm = SignExtend64(Imm, BitSize);
uint64_t Encoding;
if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
MachineInstrBuilder MIB1 =
@@ -3551,12 +3724,12 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
RC = &AArch64::GPR64RegClass;
}
unsigned NewVR = MRI.createVirtualRegister(OrrRC);
- int Imm = Root.getOperand(2).getImm();
+ uint64_t Imm = Root.getOperand(2).getImm();
if (Root.getOperand(3).isImm()) {
unsigned Val = Root.getOperand(3).getImm();
Imm = Imm << Val;
}
- uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
+ uint64_t UImm = SignExtend64(-Imm, BitSize);
uint64_t Encoding;
if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
MachineInstrBuilder MIB1 =