diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 21:25:48 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-02 21:25:48 +0000 |
commit | d88c1a5a572cdb661c111098831fa526e933756f (patch) | |
tree | 97b32c3372106ac47ded3d1a99f9c023a8530073 /contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | |
parent | 715652a404ee99f10c09c0a5edbb5883961b8c25 (diff) | |
parent | b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (diff) |
Update llvm to trunk r290819 and resolve conflicts.
Notes
Notes:
svn path=/projects/clang400-import/; revision=311142
Diffstat (limited to 'contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 299 |
1 files changed, 236 insertions, 63 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index d39542a8e4eb..b50749a29b89 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -29,16 +30,28 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AArch64GenInstrInfo.inc" -static LLVM_CONSTEXPR MachineMemOperand::Flags MOSuppressPair = +static const MachineMemOperand::Flags MOSuppressPair = MachineMemOperand::MOTargetFlag1; +static cl::opt<unsigned> +TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), + cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); + +static cl::opt<unsigned> +CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), + cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); + +static cl::opt<unsigned> +BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), + cl::desc("Restrict range of Bcc instructions (DEBUG)")); + AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), RI(STI.getTargetTriple()), Subtarget(STI) {} /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. -unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const { +unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { const MachineBasicBlock &MBB = *MI.getParent(); const MachineFunction *MF = MBB.getParent(); const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); @@ -46,22 +59,41 @@ unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const { if (MI.getOpcode() == AArch64::INLINEASM) return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); + // FIXME: We currently only handle pseudoinstructions that don't get expanded + // before the assembly printer. + unsigned NumBytes = 0; const MCInstrDesc &Desc = MI.getDesc(); switch (Desc.getOpcode()) { default: - // Anything not explicitly designated otherwise is a nomal 4-byte insn. - return 4; + // Anything not explicitly designated otherwise is a normal 4-byte insn. + NumBytes = 4; + break; case TargetOpcode::DBG_VALUE: case TargetOpcode::EH_LABEL: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: - return 0; + NumBytes = 0; + break; + case TargetOpcode::STACKMAP: + // The upper bound for a stackmap intrinsic is the full length of its shadow + NumBytes = StackMapOpers(&MI).getNumPatchBytes(); + assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + break; + case TargetOpcode::PATCHPOINT: + // The size of the patchpoint intrinsic is the number of bytes requested + NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); + assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + break; + case AArch64::TLSDESC_CALLSEQ: + // This gets lowered to an instruction sequence which takes 16 bytes + NumBytes = 16; + break; case AArch64::TLSDESC_CALLSEQ: // This gets lowered to an instruction sequence which takes 16 bytes return 16; } - llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size"); + return NumBytes; } static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, @@ -95,6 +127,56 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, } } +static unsigned getBranchDisplacementBits(unsigned Opc) { + switch (Opc) { + default: + llvm_unreachable("unexpected opcode!"); + case AArch64::B: + return 64; + case AArch64::TBNZW: + case AArch64::TBZW: + case AArch64::TBNZX: + case AArch64::TBZX: + return TBZDisplacementBits; + case AArch64::CBNZW: + case AArch64::CBZW: + case AArch64::CBNZX: + case AArch64::CBZX: + return CBZDisplacementBits; + case AArch64::Bcc: + return BCCDisplacementBits; + } +} + +bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp, + int64_t BrOffset) const { + unsigned Bits = getBranchDisplacementBits(BranchOp); + assert(Bits >= 3 && "max branch displacement must be enough to jump" + "over conditional branch expansion"); + return isIntN(Bits, BrOffset / 4); +} + +MachineBasicBlock *AArch64InstrInfo::getBranchDestBlock( + const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected opcode!"); + case AArch64::B: + return MI.getOperand(0).getMBB(); + case AArch64::TBZW: + case AArch64::TBNZW: + case AArch64::TBZX: + case AArch64::TBNZX: + return MI.getOperand(2).getMBB(); + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::CBZX: + case AArch64::CBNZX: + case AArch64::Bcc: + return MI.getOperand(1).getMBB(); + } +} + // Branch analysis. bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, @@ -183,7 +265,7 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } -bool AArch64InstrInfo::ReverseBranchCondition( +bool AArch64InstrInfo::reverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const { if (Cond[0].getImm() != -1) { // Regular Bcc @@ -224,7 +306,8 @@ bool AArch64InstrInfo::ReverseBranchCondition( return false; } -unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { +unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); if (I == MBB.end()) return 0; @@ -238,14 +321,23 @@ unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { I = MBB.end(); - if (I == MBB.begin()) + if (I == MBB.begin()) { + if (BytesRemoved) + *BytesRemoved = 4; return 1; + } --I; - if (!isCondBranchOpcode(I->getOpcode())) + if (!isCondBranchOpcode(I->getOpcode())) { + if (BytesRemoved) + *BytesRemoved = 4; return 1; + } // Remove the branch. I->eraseFromParent(); + if (BytesRemoved) + *BytesRemoved = 8; + return 2; } @@ -266,25 +358,34 @@ void AArch64InstrInfo::instantiateCondBranch( } } -unsigned AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, +unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, - const DebugLoc &DL) const { + const DebugLoc &DL, + int *BytesAdded) const { // Shouldn't be a fall through. - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert(TBB && "insertBranch must not be told to insert a fallthrough"); if (!FBB) { if (Cond.empty()) // Unconditional branch? BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); else instantiateCondBranch(MBB, DL, TBB, Cond); + + if (BytesAdded) + *BytesAdded = 4; + return 1; } // Two-way conditional branch. instantiateCondBranch(MBB, DL, TBB, Cond); BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); + + if (BytesAdded) + *BytesAdded = 8; + return 2; } @@ -318,7 +419,8 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, // if NZCV is used, do not fold. if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) return 0; - // fall-through to ADDXri and ADDWri. + // fall-through to ADDXri and ADDWri. + LLVM_FALLTHROUGH; case AArch64::ADDXri: case AArch64::ADDWri: // add x, 1 -> csinc. @@ -345,7 +447,8 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, // if NZCV is used, do not fold. if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) return 0; - // fall-through to SUBXrr and SUBWrr. + // fall-through to SUBXrr and SUBWrr. + LLVM_FALLTHROUGH; case AArch64::SUBXrr: case AArch64::SUBWrr: { // neg x -> csneg, represented as sub dst, xzr, src. @@ -861,9 +964,9 @@ static bool areCFlagsAccessedBetweenInstrs( return true; // From must be above To. - assert(std::find_if(MachineBasicBlock::reverse_iterator(To), - To->getParent()->rend(), [From](MachineInstr &MI) { - return MachineBasicBlock::iterator(MI) == From; + assert(std::find_if(++To.getReverse(), To->getParent()->rend(), + [From](MachineInstr &MI) { + return MI.getIterator() == From; }) != To->getParent()->rend()); // We iterate backward starting \p To until we hit \p From. @@ -971,6 +1074,7 @@ static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) { return false; } +namespace { struct UsedNZCV { bool N; bool Z; @@ -985,6 +1089,7 @@ struct UsedNZCV { return *this; } }; +} // end anonymous namespace /// Find a condition code used by the instruction. /// Returns AArch64CC::Invalid if either the instruction does not use condition @@ -1529,7 +1634,6 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const { switch (MI.getOpcode()) { default: break; - case AArch64::LDURQi: case AArch64::STURQi: case AArch64::LDRQui: @@ -1544,36 +1648,8 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const { bool AArch64InstrInfo::getMemOpBaseRegImmOfs( MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const { - switch (LdSt.getOpcode()) { - default: - return false; - // Scaled instructions. - case AArch64::STRSui: - case AArch64::STRDui: - case AArch64::STRQui: - case AArch64::STRXui: - case AArch64::STRWui: - case AArch64::LDRSui: - case AArch64::LDRDui: - case AArch64::LDRQui: - case AArch64::LDRXui: - case AArch64::LDRWui: - case AArch64::LDRSWui: - // Unscaled instructions. - case AArch64::STURSi: - case AArch64::STURDi: - case AArch64::STURQi: - case AArch64::STURXi: - case AArch64::STURWi: - case AArch64::LDURSi: - case AArch64::LDURDi: - case AArch64::LDURQi: - case AArch64::LDURWi: - case AArch64::LDURXi: - case AArch64::LDURSWi: - unsigned Width; - return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI); - }; + unsigned Width; + return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI); } bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( @@ -1772,6 +1848,9 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, if (NumLoads > 1) return false; + if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt)) + return false; + // Can we pair these instructions based on their opcodes? unsigned FirstOpc = FirstLdSt.getOpcode(); unsigned SecondOpc = SecondLdSt.getOpcode(); @@ -1802,41 +1881,82 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, return Offset1 + 1 == Offset2; } -bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First, - MachineInstr &Second) const { - if (Subtarget.hasMacroOpFusion()) { +bool AArch64InstrInfo::shouldScheduleAdjacent( + const MachineInstr &First, const MachineInstr &Second) const { + if (Subtarget.hasArithmeticBccFusion()) { // Fuse CMN, CMP, TST followed by Bcc. unsigned SecondOpcode = Second.getOpcode(); if (SecondOpcode == AArch64::Bcc) { switch (First.getOpcode()) { default: return false; - case AArch64::SUBSWri: case AArch64::ADDSWri: - case AArch64::ANDSWri: - case AArch64::SUBSXri: + case AArch64::ADDSWrr: case AArch64::ADDSXri: + case AArch64::ADDSXrr: + case AArch64::ANDSWri: + case AArch64::ANDSWrr: case AArch64::ANDSXri: + case AArch64::ANDSXrr: + case AArch64::SUBSWri: + case AArch64::SUBSWrr: + case AArch64::SUBSXri: + case AArch64::SUBSXrr: + case AArch64::BICSWrr: + case AArch64::BICSXrr: return true; + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: + case AArch64::ANDSWrs: + case AArch64::ANDSXrs: + case AArch64::SUBSWrs: + case AArch64::SUBSXrs: + case AArch64::BICSWrs: + case AArch64::BICSXrs: + // Shift value can be 0 making these behave like the "rr" variant... + return !hasShiftedReg(Second); } } + } + if (Subtarget.hasArithmeticCbzFusion()) { // Fuse ALU operations followed by CBZ/CBNZ. + unsigned SecondOpcode = Second.getOpcode(); if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX || SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) { switch (First.getOpcode()) { default: return false; case AArch64::ADDWri: + case AArch64::ADDWrr: case AArch64::ADDXri: + case AArch64::ADDXrr: case AArch64::ANDWri: + case AArch64::ANDWrr: case AArch64::ANDXri: + case AArch64::ANDXrr: case AArch64::EORWri: + case AArch64::EORWrr: case AArch64::EORXri: + case AArch64::EORXrr: case AArch64::ORRWri: + case AArch64::ORRWrr: case AArch64::ORRXri: + case AArch64::ORRXrr: case AArch64::SUBWri: + case AArch64::SUBWrr: case AArch64::SUBXri: + case AArch64::SUBXrr: return true; + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::ANDWrs: + case AArch64::ANDXrs: + case AArch64::SUBWrs: + case AArch64::SUBXrs: + case AArch64::BICWrs: + case AArch64::BICXrs: + // Shift value can be 0 making these behave like the "rr" variant... + return !hasShiftedReg(Second); } } } @@ -2189,7 +2309,7 @@ void AArch64InstrInfo::storeRegToStackSlot( if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); @@ -2293,7 +2413,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -2481,6 +2601,57 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( } } + // Handle the case where a copy is being spilled or refilled but the source + // and destination register class don't match. For example: + // + // %vreg0<def> = COPY %XZR; GPR64common:%vreg0 + // + // In this case we can still safely fold away the COPY and generate the + // following spill code: + // + // STRXui %XZR, <fi#0> + // + // This also eliminates spilled cross register class COPYs (e.g. between x and + // d regs) of the same size. For example: + // + // %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1 + // + // will be refilled as + // + // LDRDui %vreg0, fi<#0> + // + // instead of + // + // LDRXui %vregTemp, fi<#0> + // %vreg0 = FMOV %vregTemp + // + if (MI.isFullCopy() && Ops.size() == 1 && + // Make sure we're only folding the explicit COPY defs/uses. + (Ops[0] == 0 || Ops[0] == 1)) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineBasicBlock &MBB = *MI.getParent(); + const MachineOperand &DstMO = MI.getOperand(0); + const MachineOperand &SrcMO = MI.getOperand(1); + unsigned DstReg = DstMO.getReg(); + unsigned SrcReg = SrcMO.getReg(); + auto getRegClass = [&](unsigned Reg) { + return TargetRegisterInfo::isVirtualRegister(Reg) + ? MRI.getRegClass(Reg) + : TRI.getMinimalPhysRegClass(Reg); + }; + const TargetRegisterClass &DstRC = *getRegClass(DstReg); + const TargetRegisterClass &SrcRC = *getRegClass(SrcReg); + if (DstRC.getSize() == SrcRC.getSize()) { + if (Ops[0] == 0) + storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, + &SrcRC, &TRI); + else + loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI); + return &*--InsertPt; + } + } + // Cannot fold. return nullptr; } @@ -2829,6 +3000,8 @@ static bool isCombineInstrCandidate64(unsigned Opc) { // FP Opcodes that can be combined with a FMUL static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { switch (Inst.getOpcode()) { + default: + break; case AArch64::FADDSrr: case AArch64::FADDDrr: case AArch64::FADDv2f32: @@ -2839,9 +3012,9 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { case AArch64::FSUBv2f32: case AArch64::FSUBv2f64: case AArch64::FSUBv4f32: - return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; - default: - break; + TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; + return (Options.UnsafeFPMath || + Options.AllowFPOpFusion == FPOpFusion::Fast); } return false; } @@ -3465,7 +3638,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( unsigned Val = Root.getOperand(3).getImm(); Imm = Imm << Val; } - uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); + uint64_t UImm = SignExtend64(Imm, BitSize); uint64_t Encoding; if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { MachineInstrBuilder MIB1 = @@ -3551,12 +3724,12 @@ void AArch64InstrInfo::genAlternativeCodeSequence( RC = &AArch64::GPR64RegClass; } unsigned NewVR = MRI.createVirtualRegister(OrrRC); - int Imm = Root.getOperand(2).getImm(); + uint64_t Imm = Root.getOperand(2).getImm(); if (Root.getOperand(3).isImm()) { unsigned Val = Root.getOperand(3).getImm(); Imm = Imm << Val; } - uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize); + uint64_t UImm = SignExtend64(-Imm, BitSize); uint64_t Encoding; if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { MachineInstrBuilder MIB1 = |