diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:25:46 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:25:46 +0000 |
commit | 7a7e6055035bfd93ab507051819373a6f171258b (patch) | |
tree | dc9ac22b4fea4f445748feaf7232a146623f0dfa /contrib/llvm/lib/Target/ARM | |
parent | b96a714f453e7f5aeeb3c2df2c3e1e8ad749f96f (diff) | |
parent | 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (diff) | |
download | src-7a7e6055035bfd93ab507051819373a6f171258b.tar.gz src-7a7e6055035bfd93ab507051819373a6f171258b.zip |
Merge llvm trunk r300422 and resolve conflicts.
Notes
Notes:
svn path=/projects/clang500-import/; revision=317029
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
75 files changed, 5421 insertions, 3193 deletions
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp index 89859ba063d9..8640c873f441 100644 --- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -427,13 +427,11 @@ unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, unsigned Lane, bool QPR) { unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : &ARM::DPRRegClass); - AddDefaultPred(BuildMI(MBB, - InsertBefore, - DL, - TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), - Out) - .addReg(Reg) - .addImm(Lane)); + BuildMI(MBB, InsertBefore, DL, + TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out) + .addReg(Reg) + .addImm(Lane) + .add(predOps(ARMCC::AL)); return Out; } @@ -476,13 +474,11 @@ unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB, const DebugLoc &DL, unsigned Ssub0, unsigned Ssub1) { unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); - AddDefaultPred(BuildMI(MBB, - InsertBefore, - DL, - TII->get(ARM::VEXTd32), Out) - .addReg(Ssub0) - .addReg(Ssub1) - .addImm(1)); + BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out) + .addReg(Ssub0) + .addReg(Ssub1) + .addImm(1) + .add(predOps(ARMCC::AL)); return Out; } diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h index be3048252bbc..39f7988200ea 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.h +++ b/contrib/llvm/lib/Target/ARM/ARM.h @@ -16,21 +16,21 @@ #define LLVM_LIB_TARGET_ARM_ARM_H #include "llvm/Support/CodeGen.h" -#include "ARMBasicBlockInfo.h" #include <functional> +#include <vector> namespace llvm { class ARMAsmPrinter; class ARMBaseTargetMachine; +struct BasicBlockInfo; class Function; class FunctionPass; -class ImmutablePass; +class MachineBasicBlock; +class MachineFunction; class MachineInstr; class MCInst; class PassRegistry; -class TargetLowering; -class TargetMachine; FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); @@ -53,7 +53,8 @@ std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF); void initializeARMLoadStoreOptPass(PassRegistry &); void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); +void initializeARMConstantIslandsPass(PassRegistry &); -} // end namespace llvm; +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARM_H diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 2a090faeee6a..57f9d1c6b610 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -72,8 +72,6 @@ def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", "HasHardwareDivideInARM", "true", "Enable divide instructions in ARM mode">; -def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", - "Enable Thumb2 extract and pack instructions">; def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", "Has data barrier (dmb / dsb) instructions">; def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", @@ -263,6 +261,12 @@ def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", "Don't use movt/movw pairs for 32-bit " "imms">; +def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", + "NegativeImmediates", "false", + "Convert immediates and instructions " + "to their negated or complemented " + "equivalent when the immediate does " + "not fit in the encoding.">; //===----------------------------------------------------------------------===// // ARM ISAa. @@ -297,8 +301,7 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", FeatureV7Clrex]>; def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", "Support ARM v8 instructions", - [HasV7Ops, FeatureAcquireRelease, - FeatureT2XtPk]>; + [HasV7Ops, FeatureAcquireRelease]>; def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true", "Support ARM v8.1a instructions", [HasV8Ops]>; @@ -342,7 +345,9 @@ def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", "Cortex-A73 ARM processors", []>; def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", - "Qualcomm ARM processors", []>; + "Qualcomm Krait processors", []>; +def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", + "Qualcomm Kryo processors", []>; def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", "Swift ARM processors", []>; @@ -393,8 +398,7 @@ def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>; def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>; def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops, - FeatureDSP, - FeatureT2XtPk]>; + FeatureDSP]>; def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>; @@ -415,15 +419,22 @@ def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops, FeatureNEON, FeatureDB, FeatureDSP, - FeatureAClass, - FeatureT2XtPk]>; + FeatureAClass]>; + +def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops, + FeatureNEON, + FeatureDB, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureAClass]>; def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, FeatureDB, FeatureDSP, FeatureHWDiv, - FeatureRClass, - FeatureT2XtPk]>; + FeatureRClass]>; def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, FeatureThumb2, @@ -438,8 +449,7 @@ def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, FeatureDB, FeatureHWDiv, FeatureMClass, - FeatureDSP, - FeatureT2XtPk]>; + FeatureDSP]>; def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops, FeatureAClass, @@ -481,9 +491,6 @@ def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, FeatureRClass, FeatureDB, - FeatureHWDiv, - FeatureHWDivARM, - FeatureT2XtPk, FeatureDSP, FeatureCRC, FeatureMP, @@ -603,8 +610,6 @@ def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7, FeatureVMLxForwarding, FeatureMP, FeatureVFP4, - FeatureHWDiv, - FeatureHWDivARM, FeatureVirtualization]>; def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8, @@ -636,8 +641,6 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12, FeatureTrustZone, FeatureVMLxForwarding, FeatureVFP4, - FeatureHWDiv, - FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureVirtualization, FeatureMP]>; @@ -651,8 +654,6 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15, FeatureVFP4, FeatureMP, FeatureCheckVLDnAlign, - FeatureHWDiv, - FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureVirtualization]>; @@ -663,8 +664,6 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17, FeatureMP, FeatureVMLxForwarding, FeatureVFP4, - FeatureHWDiv, - FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureVirtualization]>; @@ -759,6 +758,15 @@ def : ProcNoItin<"cortex-m7", [ARMv7em, FeatureFPARMv8, FeatureD16]>; +def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, + FeatureNoMovt]>; + +def : ProcNoItin<"cortex-m33", [ARMv8mMainline, + FeatureDSP, + FeatureFPARMv8, + FeatureD16, + FeatureVFPOnlySP]>; + def : ProcNoItin<"cortex-a32", [ARMv8a, FeatureHWDiv, FeatureHWDivARM, @@ -829,6 +837,12 @@ def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1, FeatureCrypto, FeatureCRC]>; +def : ProcNoItin<"kryo", [ARMv8a, ProcKryo, + FeatureHWDiv, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC]>; + def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, FeatureFPAO]>; @@ -838,6 +852,8 @@ def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52, include "ARMRegisterInfo.td" +include "ARMRegisterBanks.td" + include "ARMCallingConv.td" //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 95db35ce8ffb..eb0d410b596b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -844,7 +844,7 @@ void ARMAsmPrinter::emitAttributes() { ARMBuildAttrs::Allowed); else ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, - ARMBuildAttrs::AllowIEE754); + ARMBuildAttrs::AllowIEEE754); if (STI.allowsUnalignedMem()) ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, @@ -1142,6 +1142,11 @@ void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) { const MachineOperand &MO1 = MI->getOperand(1); unsigned JTI = MO1.getIndex(); + // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for + // ARM mode tables. + EmitAlignment(2); + + // Emit a label for the jump table. MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI); OutStreamer->EmitLabel(JTISymbol); @@ -1255,7 +1260,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { switch (Opc) { default: - MI->dump(); + MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); case ARM::tPUSH: // Special case here: no src & dst reg, but two extra imp ops. @@ -1291,7 +1296,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { int64_t Offset = 0; switch (Opc) { default: - MI->dump(); + MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); case ARM::MOVr: case ARM::tMOVr: @@ -1346,11 +1351,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } } } else if (DstReg == ARM::SP) { - MI->dump(); + MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); } else { - MI->dump(); + MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); } } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 70a3246e34f1..4f5711ca9a79 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -11,34 +11,58 @@ // //===----------------------------------------------------------------------===// -#include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMFeatures.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <new> +#include <utility> +#include <vector> using namespace llvm; @@ -168,9 +192,8 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg) .addImm(Amt) - .addImm(Pred) - .addReg(0) - .addReg(0); + .add(predOps(Pred)) + .add(condCodeOp()); } else if (Amt != 0) { ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); @@ -180,17 +203,15 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( .addReg(OffReg) .addReg(0) .addImm(SOOpc) - .addImm(Pred) - .addReg(0) - .addReg(0); + .add(predOps(Pred)) + .add(condCodeOp()); } else UpdateMI = BuildMI(MF, MI.getDebugLoc(), get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) .addReg(BaseReg) .addReg(OffReg) - .addImm(Pred) - .addReg(0) - .addReg(0); + .add(predOps(Pred)) + .add(condCodeOp()); break; } case ARMII::AddrMode3 : { @@ -202,17 +223,15 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress( get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg) .addImm(Amt) - .addImm(Pred) - .addReg(0) - .addReg(0); + .add(predOps(Pred)) + .add(condCodeOp()); else UpdateMI = BuildMI(MF, MI.getDebugLoc(), get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) .addReg(BaseReg) .addReg(OffReg) - .addImm(Pred) - .addReg(0) - .addReg(0); + .add(predOps(Pred)) + .add(condCodeOp()); break; } } @@ -306,7 +325,6 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Walk backwards from the end of the basic block until the branch is // analyzed or we give up. while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) { - // Flag to be raised on unanalyzeable instructions. This is useful in cases // where we want to clean up on the end of the basic block before we bail // out. @@ -381,7 +399,6 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return false; } - unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { assert(!BytesRemoved && "code size not handled"); @@ -433,20 +450,24 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, if (!FBB) { if (Cond.empty()) { // Unconditional branch? if (isThumb) - BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL)); else BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); } else - BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addOperand(Cond[1]); + BuildMI(&MBB, DL, get(BccOpc)) + .addMBB(TBB) + .addImm(Cond[0].getImm()) + .add(Cond[1]); return 1; } // Two-way conditional branch. - BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addOperand(Cond[1]); + BuildMI(&MBB, DL, get(BccOpc)) + .addMBB(TBB) + .addImm(Cond[0].getImm()) + .add(Cond[1]); if (isThumb) - BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL)); else BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); return 2; @@ -576,7 +597,7 @@ static bool isEligibleForITBlock(const MachineInstr *MI) { /// isPredicable - Return true if the specified instruction can be predicated. /// By default, this returns true for every instruction with a /// PredicateOperand. -bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const { +bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { if (!MI.isPredicable()) return false; @@ -586,7 +607,7 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const { if (!isEligibleForITBlock(&MI)) return false; - ARMFunctionInfo *AFI = + const ARMFunctionInfo *AFI = MI.getParent()->getParent()->getInfo<ARMFunctionInfo>(); if (AFI->isThumb2Function()) { @@ -601,7 +622,8 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const { } namespace llvm { -template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { + +template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isUndef() || MO.isUse()) @@ -614,7 +636,8 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { // all definitions of CPSR are dead return true; } -} + +} // end namespace llvm /// GetInstSize - Return the size of the specified MachineInstr. /// @@ -698,9 +721,8 @@ void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, if (Subtarget.isMClass()) MIB.addImm(0x800); - AddDefaultPred(MIB); - - MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); + MIB.add(predOps(ARMCC::AL)) + .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); } void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, @@ -718,11 +740,9 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, else MIB.addImm(8); - MIB.addReg(SrcReg, getKillRegState(KillSrc)); - - AddDefaultPred(MIB); - - MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define); + MIB.addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::CPSR, RegState::Implicit | RegState::Define); } void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -733,8 +753,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); if (GPRDest && GPRSrc) { - AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)))); + BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); return; } @@ -758,7 +780,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MIB.addReg(SrcReg, getKillRegState(KillSrc)); if (Opc == ARM::VORRq) MIB.addReg(SrcReg, getKillRegState(KillSrc)); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); return; } @@ -845,10 +867,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // VORR takes two source operands. if (Opc == ARM::VORRq) Mov.addReg(Src); - Mov = AddDefaultPred(Mov); + Mov = Mov.add(predOps(ARMCC::AL)); // MOVr can set CC. if (Opc == ARM::MOVr) - Mov = AddDefaultCC(Mov); + Mov = Mov.add(condCodeOp()); } // Add implicit super-register defs and kills to the last instruction. Mov->addRegisterDefined(DestReg, TRI); @@ -886,35 +908,44 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, switch (RC->getSize()) { case 4: if (ARM::GPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::STRi12)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VSTRS)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else llvm_unreachable("Unknown reg class!"); break; case 8: if (ARM::DPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VSTRD)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { if (Subtarget.hasV5TEOps()) { MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); - MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); - - AddDefaultPred(MIB); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { // Fallback to STM instruction, which has existed since the dawn of // time. - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) - .addFrameIndex(FI).addMemOperand(MMO)); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA)) + .addFrameIndex(FI) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); } @@ -925,15 +956,18 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (ARM::DPairRegClass.hasSubClassEq(RC)) { // Use aligned spills if the stack can be realigned. if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) - .addFrameIndex(FI).addImm(16) - .addReg(SrcReg, getKillRegState(isKill)) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VST1q64)) + .addFrameIndex(FI) + .addImm(16) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } } else llvm_unreachable("Unknown reg class!"); @@ -942,15 +976,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (ARM::DTripleRegClass.hasSubClassEq(RC)) { // Use aligned spills if the stack can be realigned. if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) - .addFrameIndex(FI).addImm(16) - .addReg(SrcReg, getKillRegState(isKill)) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) + .addFrameIndex(FI) + .addImm(16) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI) + .add(predOps(ARMCC::AL)) + .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); @@ -963,15 +999,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) - .addFrameIndex(FI).addImm(16) - .addReg(SrcReg, getKillRegState(isKill)) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) + .addFrameIndex(FI) + .addImm(16) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI) + .add(predOps(ARMCC::AL)) + .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); @@ -982,10 +1020,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 64: if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI) + .add(predOps(ARMCC::AL)) + .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); @@ -1068,19 +1106,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, switch (RC->getSize()) { case 4: if (ARM::GPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else llvm_unreachable("Unknown reg class!"); break; case 8: if (ARM::DPRRegClass.hasSubClassEq(RC)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { MachineInstrBuilder MIB; @@ -1088,14 +1135,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); - MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); - - AddDefaultPred(MIB); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { // Fallback to LDM instruction, which has existed since the dawn of // time. - MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA)) - .addFrameIndex(FI).addMemOperand(MMO)); + MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA)) + .addFrameIndex(FI) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); } @@ -1108,13 +1156,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case 16: if (ARM::DPairRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) - .addFrameIndex(FI).addImm(16) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) + .addFrameIndex(FI) + .addImm(16) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) - .addFrameIndex(FI) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) + .addFrameIndex(FI) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } } else llvm_unreachable("Unknown reg class!"); @@ -1122,14 +1173,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case 24: if (ARM::DTripleRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) - .addFrameIndex(FI).addImm(16) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) + .addFrameIndex(FI) + .addImm(16) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) - .addFrameIndex(FI) - .addMemOperand(MMO)); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); @@ -1142,14 +1195,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case 32: if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) - .addFrameIndex(FI).addImm(16) - .addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) + .addFrameIndex(FI) + .addImm(16) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI) + .add(predOps(ARMCC::AL)) + .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); @@ -1162,10 +1217,10 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 64: if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI) + .add(predOps(ARMCC::AL)) + .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); @@ -1248,7 +1303,7 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD : isThumb1 ? ARM::tLDMIA_UPD : ARM::LDMIA_UPD)) - .addOperand(MI->getOperand(1)); + .add(MI->getOperand(1)); } else { LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); } @@ -1257,17 +1312,17 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD : isThumb1 ? ARM::tSTMIA_UPD : ARM::STMIA_UPD)) - .addOperand(MI->getOperand(0)); + .add(MI->getOperand(0)); } else { STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA)); } - AddDefaultPred(LDM.addOperand(MI->getOperand(3))); - AddDefaultPred(STM.addOperand(MI->getOperand(2))); + LDM.add(MI->getOperand(3)).add(predOps(ARMCC::AL)); + STM.add(MI->getOperand(2)).add(predOps(ARMCC::AL)); // Sort the scratch registers into ascending order. const TargetRegisterInfo &TRI = getRegisterInfo(); - llvm::SmallVector<unsigned, 6> ScratchRegs; + SmallVector<unsigned, 6> ScratchRegs; for(unsigned I = 5; I < MI->getNumOperands(); ++I) ScratchRegs.push_back(MI->getOperand(I).getReg()); std::sort(ScratchRegs.begin(), ScratchRegs.end(), @@ -1285,7 +1340,6 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { BB->erase(MI); } - bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && @@ -1346,7 +1400,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.setDesc(get(ARM::VMOVD)); MI.getOperand(0).setReg(DstRegD); MI.getOperand(1).setReg(SrcRegD); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); // We are now reading SrcRegD instead of SrcRegS. This may upset the // register scavenger and machine verifier, so we need to indicate that we @@ -1735,25 +1789,17 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, } } } - - // Attempt to estimate the relative costs of predication versus branching. - // Here we scale up each component of UnpredCost to avoid precision issue when - // scaling NumCycles by Probability. - const unsigned ScalingUpFactor = 1024; - unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor); - UnpredCost += ScalingUpFactor; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; - - return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost; + return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles, + MBB, 0, 0, Probability); } bool ARMBaseInstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &TMBB, +isProfitableToIfCvt(MachineBasicBlock &, unsigned TCycles, unsigned TExtra, - MachineBasicBlock &FMBB, + MachineBasicBlock &, unsigned FCycles, unsigned FExtra, BranchProbability Probability) const { - if (!TCycles || !FCycles) + if (!TCycles) return false; // Attempt to estimate the relative costs of predication versus branching. @@ -1793,7 +1839,6 @@ ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); } - unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { if (Opc == ARM::B) return ARM::Bcc; @@ -1920,25 +1965,25 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, const MCInstrDesc &DefDesc = DefMI->getDesc(); for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) - NewMI.addOperand(DefMI->getOperand(i)); + NewMI.add(DefMI->getOperand(i)); unsigned CondCode = MI.getOperand(3).getImm(); if (Invert) NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); else NewMI.addImm(CondCode); - NewMI.addOperand(MI.getOperand(4)); + NewMI.add(MI.getOperand(4)); // DefMI is not the -S version that sets CPSR, so add an optional %noreg. if (NewMI->hasOptionalDef()) - AddDefaultCC(NewMI); + NewMI.add(condCodeOp()); // The output register value when the predicate is false is an implicit // register operand tied to the first def. // The tie makes the register allocator ensure the FalseReg is allocated the // same register as operand 0. FalseReg.setImplicit(); - NewMI.addOperand(FalseReg); + NewMI.add(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); // Update SeenMIs set: register newly created MI and erase removed DefMI. @@ -1983,6 +2028,16 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { {ARM::RSBSrsi, ARM::RSBrsi}, {ARM::RSBSrsr, ARM::RSBrsr}, + {ARM::tADDSi3, ARM::tADDi3}, + {ARM::tADDSi8, ARM::tADDi8}, + {ARM::tADDSrr, ARM::tADDrr}, + {ARM::tADCS, ARM::tADC}, + + {ARM::tSUBSi3, ARM::tSUBi3}, + {ARM::tSUBSi8, ARM::tSUBi8}, + {ARM::tSUBSrr, ARM::tSUBrr}, + {ARM::tSBCS, ARM::tSBC}, + {ARM::t2ADDSri, ARM::t2ADDri}, {ARM::t2ADDSrr, ARM::t2ADDrr}, {ARM::t2ADDSrs, ARM::t2ADDrs}, @@ -2011,9 +2066,10 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, unsigned MIFlags) { if (NumBytes == 0 && DestReg != BaseReg) { BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) - .addReg(BaseReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0) - .setMIFlags(MIFlags); + .addReg(BaseReg, RegState::Kill) + .add(predOps(Pred, PredReg)) + .add(condCodeOp()) + .setMIFlags(MIFlags); return; } @@ -2033,9 +2089,11 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, // Build the new ADD / SUB. unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg, RegState::Kill).addImm(ThisVal) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0) - .setMIFlags(MIFlags); + .addReg(BaseReg, RegState::Kill) + .addImm(ThisVal) + .add(predOps(Pred, PredReg)) + .add(condCodeOp()) + .setMIFlags(MIFlags); BaseReg = DestReg; } } @@ -2154,7 +2212,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, // Add the complete list back in. MachineInstrBuilder MIB(MF, &*MI); for (int i = RegList.size() - 1; i >= 0; --i) - MIB.addOperand(RegList[i]); + MIB.add(RegList[i]); return true; } @@ -2213,33 +2271,30 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned NumBits = 0; unsigned Scale = 1; switch (AddrMode) { - case ARMII::AddrMode_i12: { + case ARMII::AddrMode_i12: ImmIdx = FrameRegIdx + 1; InstrOffs = MI.getOperand(ImmIdx).getImm(); NumBits = 12; break; - } - case ARMII::AddrMode2: { + case ARMII::AddrMode2: ImmIdx = FrameRegIdx+2; InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) InstrOffs *= -1; NumBits = 12; break; - } - case ARMII::AddrMode3: { + case ARMII::AddrMode3: ImmIdx = FrameRegIdx+2; InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) InstrOffs *= -1; NumBits = 8; break; - } case ARMII::AddrMode4: case ARMII::AddrMode6: // Can't fold any offset even if it's zero. return false; - case ARMII::AddrMode5: { + case ARMII::AddrMode5: ImmIdx = FrameRegIdx+1; InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) @@ -2247,7 +2302,6 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, NumBits = 8; Scale = 4; break; - } default: llvm_unreachable("Unsupported addressing mode!"); } @@ -2401,6 +2455,63 @@ inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, return false; } +static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { + switch (MI->getOpcode()) { + default: return false; + case ARM::tLSLri: + case ARM::tLSRri: + case ARM::tLSLrr: + case ARM::tLSRrr: + case ARM::tSUBrr: + case ARM::tADDrr: + case ARM::tADDi3: + case ARM::tADDi8: + case ARM::tSUBi3: + case ARM::tSUBi8: + case ARM::tMUL: + IsThumb1 = true; + LLVM_FALLTHROUGH; + case ARM::RSBrr: + case ARM::RSBri: + case ARM::RSCrr: + case ARM::RSCri: + case ARM::ADDrr: + case ARM::ADDri: + case ARM::ADCrr: + case ARM::ADCri: + case ARM::SUBrr: + case ARM::SUBri: + case ARM::SBCrr: + case ARM::SBCri: + case ARM::t2RSBri: + case ARM::t2ADDrr: + case ARM::t2ADDri: + case ARM::t2ADCrr: + case ARM::t2ADCri: + case ARM::t2SUBrr: + case ARM::t2SUBri: + case ARM::t2SBCrr: + case ARM::t2SBCri: + case ARM::ANDrr: + case ARM::ANDri: + case ARM::t2ANDrr: + case ARM::t2ANDri: + case ARM::ORRrr: + case ARM::ORRri: + case ARM::t2ORRrr: + case ARM::t2ORRri: + case ARM::EORrr: + case ARM::EORri: + case ARM::t2EORrr: + case ARM::t2EORri: + case ARM::t2LSRri: + case ARM::t2LSRrr: + case ARM::t2LSLri: + case ARM::t2LSLrr: + return true; + } +} + /// optimizeCompareInstr - Convert the instruction supplying the argument to the /// comparison into one that sets the zero bit in the flags register; /// Remove a redundant Compare instruction if an earlier instruction can set the @@ -2462,6 +2573,41 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( return false; } + bool IsThumb1 = false; + if (MI && !isOptimizeCompareCandidate(MI, IsThumb1)) + return false; + + // We also want to do this peephole for cases like this: if (a*b == 0), + // and optimise away the CMP instruction from the generated code sequence: + // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values + // resulting from the select instruction, but these MOVS instructions for + // Thumb1 (V6M) are flag setting and are thus preventing this optimisation. + // However, if we only have MOVS instructions in between the CMP and the + // other instruction (the MULS in this example), then the CPSR is dead so we + // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this + // reordering and then continue the analysis hoping we can eliminate the + // CMP. This peephole works on the vregs, so is still in SSA form. As a + // consequence, the movs won't redefine/kill the MUL operands which would + // make this reordering illegal. + if (MI && IsThumb1) { + --I; + bool CanReorder = true; + const bool HasStmts = I != E; + for (; I != E; --I) { + if (I->getOpcode() != ARM::tMOVi8) { + CanReorder = false; + break; + } + } + if (HasStmts && CanReorder) { + MI = MI->removeFromParent(); + E = CmpInstr; + CmpInstr.getParent()->insert(E, MI); + } + I = CmpInstr; + E = MI; + } + // Check that CPSR isn't set between the comparison instruction and the one we // want to change. At the same time, search for Sub. const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -2497,183 +2643,128 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( if (isPredicated(*MI)) return false; - bool IsThumb1 = false; - switch (MI->getOpcode()) { - default: break; - case ARM::tLSLri: - case ARM::tLSRri: - case ARM::tLSLrr: - case ARM::tLSRrr: - case ARM::tSUBrr: - case ARM::tADDrr: - case ARM::tADDi3: - case ARM::tADDi8: - case ARM::tSUBi3: - case ARM::tSUBi8: - IsThumb1 = true; - LLVM_FALLTHROUGH; - case ARM::RSBrr: - case ARM::RSBri: - case ARM::RSCrr: - case ARM::RSCri: - case ARM::ADDrr: - case ARM::ADDri: - case ARM::ADCrr: - case ARM::ADCri: - case ARM::SUBrr: - case ARM::SUBri: - case ARM::SBCrr: - case ARM::SBCri: - case ARM::t2RSBri: - case ARM::t2ADDrr: - case ARM::t2ADDri: - case ARM::t2ADCrr: - case ARM::t2ADCri: - case ARM::t2SUBrr: - case ARM::t2SUBri: - case ARM::t2SBCrr: - case ARM::t2SBCri: - case ARM::ANDrr: - case ARM::ANDri: - case ARM::t2ANDrr: - case ARM::t2ANDri: - case ARM::ORRrr: - case ARM::ORRri: - case ARM::t2ORRrr: - case ARM::t2ORRri: - case ARM::EORrr: - case ARM::EORri: - case ARM::t2EORrr: - case ARM::t2EORri: - case ARM::t2LSRri: - case ARM::t2LSRrr: - case ARM::t2LSLri: - case ARM::t2LSLrr: { - // Scan forward for the use of CPSR - // When checking against MI: if it's a conditional code that requires - // checking of the V bit or C bit, then this is not safe to do. - // It is safe to remove CmpInstr if CPSR is redefined or killed. - // If we are done with the basic block, we need to check whether CPSR is - // live-out. - SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> - OperandsToUpdate; - bool isSafe = false; - I = CmpInstr; - E = CmpInstr.getParent()->end(); - while (!isSafe && ++I != E) { - const MachineInstr &Instr = *I; - for (unsigned IO = 0, EO = Instr.getNumOperands(); - !isSafe && IO != EO; ++IO) { - const MachineOperand &MO = Instr.getOperand(IO); - if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { - isSafe = true; - break; - } - if (!MO.isReg() || MO.getReg() != ARM::CPSR) - continue; - if (MO.isDef()) { - isSafe = true; - break; - } - // Condition code is after the operand before CPSR except for VSELs. - ARMCC::CondCodes CC; - bool IsInstrVSel = true; - switch (Instr.getOpcode()) { - default: - IsInstrVSel = false; - CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); - break; - case ARM::VSELEQD: - case ARM::VSELEQS: - CC = ARMCC::EQ; - break; - case ARM::VSELGTD: - case ARM::VSELGTS: - CC = ARMCC::GT; - break; - case ARM::VSELGED: - case ARM::VSELGES: - CC = ARMCC::GE; - break; - case ARM::VSELVSS: - case ARM::VSELVSD: - CC = ARMCC::VS; - break; - } + // Scan forward for the use of CPSR + // When checking against MI: if it's a conditional code that requires + // checking of the V bit or C bit, then this is not safe to do. + // It is safe to remove CmpInstr if CPSR is redefined or killed. + // If we are done with the basic block, we need to check whether CPSR is + // live-out. + SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> + OperandsToUpdate; + bool isSafe = false; + I = CmpInstr; + E = CmpInstr.getParent()->end(); + while (!isSafe && ++I != E) { + const MachineInstr &Instr = *I; + for (unsigned IO = 0, EO = Instr.getNumOperands(); + !isSafe && IO != EO; ++IO) { + const MachineOperand &MO = Instr.getOperand(IO); + if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { + isSafe = true; + break; + } + if (!MO.isReg() || MO.getReg() != ARM::CPSR) + continue; + if (MO.isDef()) { + isSafe = true; + break; + } + // Condition code is after the operand before CPSR except for VSELs. + ARMCC::CondCodes CC; + bool IsInstrVSel = true; + switch (Instr.getOpcode()) { + default: + IsInstrVSel = false; + CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); + break; + case ARM::VSELEQD: + case ARM::VSELEQS: + CC = ARMCC::EQ; + break; + case ARM::VSELGTD: + case ARM::VSELGTS: + CC = ARMCC::GT; + break; + case ARM::VSELGED: + case ARM::VSELGES: + CC = ARMCC::GE; + break; + case ARM::VSELVSS: + case ARM::VSELVSD: + CC = ARMCC::VS; + break; + } - if (Sub) { - ARMCC::CondCodes NewCC = getSwappedCondition(CC); - if (NewCC == ARMCC::AL) - return false; - // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based - // on CMP needs to be updated to be based on SUB. - // Push the condition code operands to OperandsToUpdate. - // If it is safe to remove CmpInstr, the condition code of these - // operands will be modified. - if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) { - // VSel doesn't support condition code update. - if (IsInstrVSel) - return false; - OperandsToUpdate.push_back( - std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); - } - } else { - // No Sub, so this is x = <op> y, z; cmp x, 0. - switch (CC) { - case ARMCC::EQ: // Z - case ARMCC::NE: // Z - case ARMCC::MI: // N - case ARMCC::PL: // N - case ARMCC::AL: // none - // CPSR can be used multiple times, we should continue. - break; - case ARMCC::HS: // C - case ARMCC::LO: // C - case ARMCC::VS: // V - case ARMCC::VC: // V - case ARMCC::HI: // C Z - case ARMCC::LS: // C Z - case ARMCC::GE: // N V - case ARMCC::LT: // N V - case ARMCC::GT: // Z N V - case ARMCC::LE: // Z N V - // The instruction uses the V bit or C bit which is not safe. + if (Sub) { + ARMCC::CondCodes NewCC = getSwappedCondition(CC); + if (NewCC == ARMCC::AL) + return false; + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based + // on CMP needs to be updated to be based on SUB. + // Push the condition code operands to OperandsToUpdate. + // If it is safe to remove CmpInstr, the condition code of these + // operands will be modified. + if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg) { + // VSel doesn't support condition code update. + if (IsInstrVSel) return false; - } + OperandsToUpdate.push_back( + std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); } - } - } - - // If CPSR is not killed nor re-defined, we should check whether it is - // live-out. If it is live-out, do not optimize. - if (!isSafe) { - MachineBasicBlock *MBB = CmpInstr.getParent(); - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(ARM::CPSR)) + } else { + // No Sub, so this is x = <op> y, z; cmp x, 0. + switch (CC) { + case ARMCC::EQ: // Z + case ARMCC::NE: // Z + case ARMCC::MI: // N + case ARMCC::PL: // N + case ARMCC::AL: // none + // CPSR can be used multiple times, we should continue. + break; + case ARMCC::HS: // C + case ARMCC::LO: // C + case ARMCC::VS: // V + case ARMCC::VC: // V + case ARMCC::HI: // C Z + case ARMCC::LS: // C Z + case ARMCC::GE: // N V + case ARMCC::LT: // N V + case ARMCC::GT: // Z N V + case ARMCC::LE: // Z N V + // The instruction uses the V bit or C bit which is not safe. return false; + } + } } + } - // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always - // set CPSR so this is represented as an explicit output) - if (!IsThumb1) { - MI->getOperand(5).setReg(ARM::CPSR); - MI->getOperand(5).setIsDef(true); - } - assert(!isPredicated(*MI) && "Can't use flags from predicated instruction"); - CmpInstr.eraseFromParent(); - - // Modify the condition code of operands in OperandsToUpdate. - // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to - // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. - for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) - OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); - return true; + // If CPSR is not killed nor re-defined, we should check whether it is + // live-out. If it is live-out, do not optimize. + if (!isSafe) { + MachineBasicBlock *MBB = CmpInstr.getParent(); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(ARM::CPSR)) + return false; } + + // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always + // set CPSR so this is represented as an explicit output) + if (!IsThumb1) { + MI->getOperand(5).setReg(ARM::CPSR); + MI->getOperand(5).setIsDef(true); } - - return false; + assert(!isPredicated(*MI) && "Can't use flags from predicated instruction"); + CmpInstr.eraseFromParent(); + + // Modify the condition code of operands in OperandsToUpdate. + // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to + // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. + for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) + OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); + + return true; } bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, @@ -2728,7 +2819,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, switch (UseOpc) { default: break; case ARM::ADDrr: - case ARM::SUBrr: { + case ARM::SUBrr: if (UseOpc == ARM::SUBrr && Commute) return false; @@ -2744,9 +2835,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); break; - } case ARM::ORRrr: - case ARM::EORrr: { + case ARM::EORrr: if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) return false; SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); @@ -2757,9 +2847,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, case ARM::EORrr: NewUseOpc = ARM::EORri; break; } break; - } case ARM::t2ADDrr: - case ARM::t2SUBrr: { + case ARM::t2SUBrr: if (UseOpc == ARM::t2SUBrr && Commute) return false; @@ -2775,9 +2864,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); break; - } case ARM::t2ORRrr: - case ARM::t2EORrr: { + case ARM::t2EORrr: if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) return false; SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); @@ -2789,7 +2877,6 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, } break; } - } } } @@ -2797,11 +2884,12 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg1 = UseMI.getOperand(OpIdx).getReg(); bool isKill = UseMI.getOperand(OpIdx).isKill(); unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); - AddDefaultCC( - AddDefaultPred(BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), - get(NewUseOpc), NewReg) - .addReg(Reg1, getKillRegState(isKill)) - .addImm(SOImmValV1))); + BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc), + NewReg) + .addReg(Reg1, getKillRegState(isKill)) + .addImm(SOImmValV1) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); UseMI.setDesc(get(NewUseOpc)); UseMI.getOperand(1).setReg(NewReg); UseMI.getOperand(1).setIsKill(); @@ -3413,7 +3501,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::t2LDMDB: case ARM::t2LDMIA_UPD: case ARM::t2LDMDB_UPD: - LdmBypass = 1; + LdmBypass = true; DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); break; } @@ -3888,12 +3976,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::t2LDRs: case ARM::t2LDRBs: case ARM::t2LDRHs: - case ARM::t2LDRSHs: { + case ARM::t2LDRSHs: // Thumb2 mode: lsl 0-3 only. Latency -= 2; break; } - } } if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) @@ -4180,14 +4267,14 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, MachineMemOperand::MOInvariant; MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); - MIB.addMemOperand(MMO); - AddDefaultPred(MIB); + MIB.addMemOperand(MMO).add(predOps(ARMCC::AL)); } MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); - MIB.addReg(Reg, RegState::Kill).addImm(0); - MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); - AddDefaultPred(MIB); + MIB.addReg(Reg, RegState::Kill) + .addImm(0) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()) + .add(predOps(ARMCC::AL)); } bool @@ -4222,6 +4309,7 @@ enum ARMExeDomain { ExeVFP = 1, ExeNEON = 2 }; + // // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h // @@ -4345,8 +4433,10 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) MI.setDesc(get(ARM::VORRd)); - AddDefaultPred( - MIB.addReg(DstReg, RegState::Define).addReg(SrcReg).addReg(SrcReg)); + MIB.addReg(DstReg, RegState::Define) + .addReg(SrcReg) + .addReg(SrcReg) + .add(predOps(ARMCC::AL)); break; case ARM::VMOVRS: if (Domain != ExeNEON) @@ -4366,9 +4456,10 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, // Note that DSrc has been widened and the other lane may be undef, which // contaminates the entire register. MI.setDesc(get(ARM::VGETLNi32)); - AddDefaultPred(MIB.addReg(DstReg, RegState::Define) - .addReg(DReg, RegState::Undef) - .addImm(Lane)); + MIB.addReg(DstReg, RegState::Define) + .addReg(DReg, RegState::Undef) + .addImm(Lane) + .add(predOps(ARMCC::AL)); // The old source should be an implicit use, otherwise we might think it // was dead before here. @@ -4398,8 +4489,8 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, MIB.addReg(DReg, RegState::Define) .addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI))) .addReg(SrcReg) - .addImm(Lane); - AddDefaultPred(MIB); + .addImm(Lane) + .add(predOps(ARMCC::AL)); // The narrower destination must be marked as set to keep previous chains // in place. @@ -4433,8 +4524,8 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, MI.setDesc(get(ARM::VDUPLN32d)); MIB.addReg(DDst, RegState::Define) .addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI))) - .addImm(SrcLane); - AddDefaultPred(MIB); + .addImm(SrcLane) + .add(predOps(ARMCC::AL)); // Neither the source or the destination are naturally represented any // more, so add them in manually. @@ -4470,10 +4561,9 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; CurUndef = !MI.readsRegister(CurReg, TRI); - NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); - - NewMIB.addImm(1); - AddDefaultPred(NewMIB); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)) + .addImm(1) + .add(predOps(ARMCC::AL)); if (SrcLane == DstLane) NewMIB.addReg(SrcReg, RegState::Implicit); @@ -4489,10 +4579,9 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); - MIB.addReg(CurReg, getUndefRegState(CurUndef)); - - MIB.addImm(1); - AddDefaultPred(MIB); + MIB.addReg(CurReg, getUndefRegState(CurUndef)) + .addImm(1) + .add(predOps(ARMCC::AL)); if (SrcLane != DstLane) MIB.addReg(SrcReg, RegState::Implicit); @@ -4505,7 +4594,6 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, break; } } - } //===----------------------------------------------------------------------===// @@ -4613,9 +4701,9 @@ void ARMBaseInstrInfo::breakPartialRegDependency( // Insert the dependency-breaking FCONSTD before MI. // 96 is the encoding of 0.5, but the actual value doesn't matter here. - AddDefaultPred( - BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg) - .addImm(96)); + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg) + .addImm(96) + .add(predOps(ARMCC::AL)); MI.addRegisterKilled(DReg, TRI, true); } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index b01d5c8ec85f..23777b821f9f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -17,16 +17,21 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/CodeGen.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/Target/TargetInstrInfo.h" +#include <array> +#include <cstdint> #define GET_INSTRINFO_HEADER #include "ARMGenInstrInfo.inc" namespace llvm { - class ARMSubtarget; - class ARMBaseRegisterInfo; + +class ARMBaseRegisterInfo; +class ARMSubtarget; class ARMBaseInstrInfo : public ARMGenInstrInfo { const ARMSubtarget &Subtarget; @@ -106,7 +111,7 @@ public: // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. - virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; + virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0; MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr &MI, @@ -156,7 +161,7 @@ public: bool DefinesPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred) const override; - bool isPredicable(MachineInstr &MI) const override; + bool isPredicable(const MachineInstr &MI) const override; /// GetInstSize - Returns the size of the specified MachineInstr. /// @@ -401,25 +406,28 @@ public: bool isSwiftFastImmShift(const MachineInstr *MI) const; }; -static inline -const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { - return MIB.addImm((int64_t)ARMCC::AL).addReg(0); +/// Get the operands corresponding to the given \p Pred value. By default, the +/// predicate register is assumed to be 0 (no register), but you can pass in a +/// \p PredReg if that is not the case. +static inline std::array<MachineOperand, 2> predOps(ARMCC::CondCodes Pred, + unsigned PredReg = 0) { + return {{MachineOperand::CreateImm(static_cast<int64_t>(Pred)), + MachineOperand::CreateReg(PredReg, false)}}; } -static inline -const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { - return MIB.addReg(0); +/// Get the operand corresponding to the conditional code result. By default, +/// this is 0 (no register). +static inline MachineOperand condCodeOp(unsigned CCReg = 0) { + return MachineOperand::CreateReg(CCReg, false); } -static inline -const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB, - bool isDead = false) { - return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead)); -} - -static inline -const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) { - return MIB.addReg(0); +/// Get the operand corresponding to the conditional code result for Thumb1. +/// This operand will always refer to CPSR and it will have the Define flag set. +/// You can optionally set the Dead flag by means of \p isDead. +static inline MachineOperand t1CondCodeOp(bool isDead = false) { + return MachineOperand::CreateReg(ARM::CPSR, + /*Define*/ true, /*Implicit*/ false, + /*Kill*/ false, isDead); } static inline @@ -517,6 +525,6 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII); -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index d995c631dd1c..70a44eaaceb8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -11,32 +11,42 @@ // //===----------------------------------------------------------------------===// -#include "ARMBaseRegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <utility> #define DEBUG_TYPE "arm-register-info" @@ -46,7 +56,7 @@ using namespace llvm; ARMBaseRegisterInfo::ARMBaseRegisterInfo() - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {} + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC) {} static unsigned getFramePointerReg(const ARMSubtarget &STI) { return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11; @@ -140,7 +150,6 @@ ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) con return CSR_FPRegs_RegMask; } - const uint32_t * ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { @@ -425,10 +434,11 @@ void ARMBaseRegisterInfo::emitLoadConstPool( unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp)) - .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx) - .addImm(0).addImm(Pred).addReg(PredReg) - .setMIFlags(MIFlags); + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx) + .addImm(0) + .add(predOps(Pred, PredReg)) + .setMIFlags(MIFlags); } bool ARMBaseRegisterInfo:: @@ -474,26 +484,23 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { Scale = 4; break; } - case ARMII::AddrMode2: { + case ARMII::AddrMode2: ImmIdx = Idx+2; InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm()); if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub) InstrOffs = -InstrOffs; break; - } - case ARMII::AddrMode3: { + case ARMII::AddrMode3: ImmIdx = Idx+2; InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm()); if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub) InstrOffs = -InstrOffs; break; - } - case ARMII::AddrModeT1_s: { + case ARMII::AddrModeT1_s: ImmIdx = Idx+1; InstrOffs = MI->getOperand(ImmIdx).getImm(); Scale = 4; break; - } default: llvm_unreachable("Unsupported addressing mode!"); } @@ -609,7 +616,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, .addFrameIndex(FrameIdx).addImm(Offset); if (!AFI->isThumb1OnlyFunction()) - AddDefaultCC(AddDefaultPred(MIB)); + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); } void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, @@ -636,7 +643,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, assert(AFI->isThumb2Function()); Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII); } - assert (Done && "Unable to resolve frame index!"); + assert(Done && "Unable to resolve frame index!"); (void)Done; } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 330e1535e863..2e91d9d4be24 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -15,24 +15,33 @@ #define LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H #include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include <cstdint> #define GET_REGINFO_HEADER #include "ARMGenRegisterInfo.inc" namespace llvm { + /// Register allocation hints. namespace ARMRI { + enum { RegPairOdd = 1, RegPairEven = 2 }; -} + +} // end namespace ARMRI /// isARMArea1Register - Returns true if the register is a low register (r0-r7) /// or a stack/pc register that we should push/pop. static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { using namespace ARM; + switch (Reg) { case R0: case R1: case R2: case R3: case R4: case R5: case R6: case R7: @@ -48,6 +57,7 @@ static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { static inline bool isARMArea2Register(unsigned Reg, bool isIOS) { using namespace ARM; + switch (Reg) { case R8: case R9: case R10: case R11: case R12: // iOS has this second area. @@ -59,6 +69,7 @@ static inline bool isARMArea2Register(unsigned Reg, bool isIOS) { static inline bool isARMArea3Register(unsigned Reg, bool isIOS) { using namespace ARM; + switch (Reg) { case D15: case D14: case D13: case D12: case D11: case D10: case D9: case D8: @@ -87,7 +98,7 @@ protected: /// BasePtr - ARM physical register used as a base ptr in complex stack /// frames. I.e., when we need a 3rd base, not just SP and FP, due to /// variable size stack objects. - unsigned BasePtr; + unsigned BasePtr = ARM::R6; // Can be only subclassed. explicit ARMBaseRegisterInfo(); @@ -198,4 +209,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H diff --git a/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h index 780544f865df..e0cb0aa676a6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H #define LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H -#include "ARM.h" -#include "ARMMachineFunctionInfo.h" -using namespace llvm; +#include "llvm/Support/MathExtras.h" +#include <algorithm> +#include <cstdint> namespace llvm { @@ -44,31 +44,30 @@ struct BasicBlockInfo { /// /// Because worst case padding is used, the computed offset of an aligned /// block may not actually be aligned. - unsigned Offset; + unsigned Offset = 0; /// Size - Size of the basic block in bytes. If the block contains /// inline assembly, this is a worst case estimate. /// /// The size does not include any alignment padding whether from the /// beginning of the block, or from an aligned jump table at the end. - unsigned Size; + unsigned Size = 0; /// KnownBits - The number of low bits in Offset that are known to be /// exact. The remaining bits of Offset are an upper bound. - uint8_t KnownBits; + uint8_t KnownBits = 0; /// Unalign - When non-zero, the block contains instructions (inline asm) /// of unknown size. The real size may be smaller than Size bytes by a /// multiple of 1 << Unalign. - uint8_t Unalign; + uint8_t Unalign = 0; /// PostAlign - When non-zero, the block terminator contains a .align /// directive, so the end of the block is aligned to 1 << PostAlign /// bytes. - uint8_t PostAlign; + uint8_t PostAlign = 0; - BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0), - PostAlign(0) {} + BasicBlockInfo() = default; /// Compute the number of known offset bits internally to this block. /// This number should be used to predict worst case padding when @@ -107,4 +106,4 @@ struct BasicBlockInfo { } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp index 52c95b6244ac..94b317a8f986 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -17,7 +17,9 @@ #include "ARMBaseInstrInfo.h" #include "ARMISelLowering.h" +#include "ARMSubtarget.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -30,25 +32,47 @@ using namespace llvm; ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) : CallLowering(&TLI) {} -static bool isSupportedType(const DataLayout DL, const ARMTargetLowering &TLI, +static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T) { - EVT VT = TLI.getValueType(DL, T); - if (!VT.isSimple() || !VT.isInteger() || VT.isVector()) + EVT VT = TLI.getValueType(DL, T, true); + if (!VT.isSimple() || VT.isVector()) return false; unsigned VTSize = VT.getSimpleVT().getSizeInBits(); - return VTSize == 8 || VTSize == 16 || VTSize == 32; + + if (VTSize == 64) + // FIXME: Support i64 too + return VT.isFloatingPoint(); + + return VTSize == 1 || VTSize == 8 || VTSize == 16 || VTSize == 32; } namespace { -struct FuncReturnHandler : public CallLowering::ValueHandler { - FuncReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - MachineInstrBuilder &MIB) - : ValueHandler(MIRBuilder, MRI), MIB(MIB) {} +/// Helper class for values going out through an ABI boundary (used for handling +/// function return values and call parameters). +struct OutgoingValueHandler : public CallLowering::ValueHandler { + OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder &MIB, CCAssignFn *AssignFn) + : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), StackSize(0) {} unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { - llvm_unreachable("Don't know how to get a stack address yet"); + assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && + "Unsupported size"); + + LLT p0 = LLT::pointer(0, 32); + LLT s32 = LLT::scalar(32); + unsigned SPReg = MRI.createGenericVirtualRegister(p0); + MIRBuilder.buildCopy(SPReg, ARM::SP); + + unsigned OffsetReg = MRI.createGenericVirtualRegister(s32); + MIRBuilder.buildConstant(OffsetReg, Offset); + + unsigned AddrReg = MRI.createGenericVirtualRegister(p0); + MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg); + + MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset); + return AddrReg; } void assignValueToReg(unsigned ValVReg, unsigned PhysReg, @@ -56,26 +80,92 @@ struct FuncReturnHandler : public CallLowering::ValueHandler { assert(VA.isRegLoc() && "Value shouldn't be assigned to reg"); assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?"); - assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size"); - assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size"); - - assert(VA.getLocInfo() != CCValAssign::SExt && - VA.getLocInfo() != CCValAssign::ZExt && - "ABI extensions not supported yet"); + assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size"); + assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size"); - MIRBuilder.buildCopy(PhysReg, ValVReg); + unsigned ExtReg = extendRegister(ValVReg, VA); + MIRBuilder.buildCopy(PhysReg, ExtReg); MIB.addUse(PhysReg, RegState::Implicit); } void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { - llvm_unreachable("Don't know how to assign a value to an address yet"); + assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && + "Unsupported size"); + + unsigned ExtReg = extendRegister(ValVReg, VA); + auto MMO = MIRBuilder.getMF().getMachineMemOperand( + MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(), + /* Alignment */ 0); + MIRBuilder.buildStore(ExtReg, Addr, *MMO); + } + + unsigned assignCustomValue(const CallLowering::ArgInfo &Arg, + ArrayRef<CCValAssign> VAs) override { + CCValAssign VA = VAs[0]; + assert(VA.needsCustom() && "Value doesn't need custom handling"); + assert(VA.getValVT() == MVT::f64 && "Unsupported type"); + + CCValAssign NextVA = VAs[1]; + assert(NextVA.needsCustom() && "Value doesn't need custom handling"); + assert(NextVA.getValVT() == MVT::f64 && "Unsupported type"); + + assert(VA.getValNo() == NextVA.getValNo() && + "Values belong to different arguments"); + + assert(VA.isRegLoc() && "Value should be in reg"); + assert(NextVA.isRegLoc() && "Value should be in reg"); + + unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), + MRI.createGenericVirtualRegister(LLT::scalar(32))}; + MIRBuilder.buildExtract(NewRegs[0], Arg.Reg, 0); + MIRBuilder.buildExtract(NewRegs[1], Arg.Reg, 32); + + bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle(); + if (!IsLittle) + std::swap(NewRegs[0], NewRegs[1]); + + assignValueToReg(NewRegs[0], VA.getLocReg(), VA); + assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA); + + return 1; + } + + bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + const CallLowering::ArgInfo &Info, CCState &State) override { + if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State)) + return true; + + StackSize = + std::max(StackSize, static_cast<uint64_t>(State.getNextStackOffset())); + return false; } MachineInstrBuilder &MIB; + uint64_t StackSize; }; } // End anonymous namespace. +void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg, + SmallVectorImpl<ArgInfo> &SplitArgs, + const DataLayout &DL, + MachineRegisterInfo &MRI) const { + const ARMTargetLowering &TLI = *getTLI<ARMTargetLowering>(); + LLVMContext &Ctx = OrigArg.Ty->getContext(); + + SmallVector<EVT, 4> SplitVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); + + assert(SplitVTs.size() == 1 && "Unsupported type"); + + // Even if there is no splitting to do, we still want to replace the original + // type (e.g. pointer type -> integer). + SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), + OrigArg.Flags, OrigArg.IsFixed); +} + /// Lower the return value for the already existing \p Ret. This assumes that /// \p MIRBuilder's insertion point is correct. bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, @@ -93,21 +183,23 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, if (!isSupportedType(DL, TLI, Val->getType())) return false; + SmallVector<ArgInfo, 4> SplitVTs; + ArgInfo RetInfo(VReg, Val->getType()); + setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); + splitToValueTypes(RetInfo, SplitVTs, DL, MF.getRegInfo()); + CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg()); - ArgInfo RetInfo(VReg, Val->getType()); - setArgFlags(RetInfo, AttributeSet::ReturnIndex, DL, F); - - FuncReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret); - return handleAssignments(MIRBuilder, AssignFn, RetInfo, RetHandler); + OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn); + return handleAssignments(MIRBuilder, SplitVTs, RetHandler); } bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg) const { assert(!Val == !VReg && "Return value without a vreg"); - auto Ret = AddDefaultPred(MIRBuilder.buildInstrNoInsert(ARM::BX_RET)); + auto Ret = MIRBuilder.buildInstrNoInsert(ARM::BX_RET).add(predOps(ARMCC::AL)); if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret)) return false; @@ -117,13 +209,17 @@ bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, } namespace { -struct FormalArgHandler : public CallLowering::ValueHandler { - FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI) - : ValueHandler(MIRBuilder, MRI) {} +/// Helper class for values coming in through an ABI boundary (used for handling +/// formal arguments and call return values). +struct IncomingValueHandler : public CallLowering::ValueHandler { + IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + CCAssignFn AssignFn) + : ValueHandler(MIRBuilder, MRI, AssignFn) {} unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { - assert(Size == 4 && "Unsupported size"); + assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && + "Unsupported size"); auto &MFI = MIRBuilder.getMF().getFrameInfo(); @@ -139,7 +235,17 @@ struct FormalArgHandler : public CallLowering::ValueHandler { void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { - assert(Size == 4 && "Unsupported size"); + assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) && + "Unsupported size"); + + if (VA.getLocInfo() == CCValAssign::SExt || + VA.getLocInfo() == CCValAssign::ZExt) { + // If the value is zero- or sign-extended, its size becomes 4 bytes, so + // that's what we should load. + Size = 4; + assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm"); + MRI.setType(ValVReg, LLT::scalar(32)); + } auto MMO = MIRBuilder.getMF().getMachineMemOperand( MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0); @@ -151,12 +257,60 @@ struct FormalArgHandler : public CallLowering::ValueHandler { assert(VA.isRegLoc() && "Value shouldn't be assigned to reg"); assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?"); - assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size"); - assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size"); + assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size"); + assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size"); - MIRBuilder.getMBB().addLiveIn(PhysReg); + // The necesary extensions are handled on the other side of the ABI + // boundary. + markPhysRegUsed(PhysReg); MIRBuilder.buildCopy(ValVReg, PhysReg); } + + unsigned assignCustomValue(const ARMCallLowering::ArgInfo &Arg, + ArrayRef<CCValAssign> VAs) override { + CCValAssign VA = VAs[0]; + assert(VA.needsCustom() && "Value doesn't need custom handling"); + assert(VA.getValVT() == MVT::f64 && "Unsupported type"); + + CCValAssign NextVA = VAs[1]; + assert(NextVA.needsCustom() && "Value doesn't need custom handling"); + assert(NextVA.getValVT() == MVT::f64 && "Unsupported type"); + + assert(VA.getValNo() == NextVA.getValNo() && + "Values belong to different arguments"); + + assert(VA.isRegLoc() && "Value should be in reg"); + assert(NextVA.isRegLoc() && "Value should be in reg"); + + unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), + MRI.createGenericVirtualRegister(LLT::scalar(32))}; + + assignValueToReg(NewRegs[0], VA.getLocReg(), VA); + assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA); + + bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle(); + if (!IsLittle) + std::swap(NewRegs[0], NewRegs[1]); + + MIRBuilder.buildSequence(Arg.Reg, NewRegs, {0, 32}); + + return 1; + } + + /// Marking a physical register as used is different between formal + /// parameters, where it's a basic block live-in, and call returns, where it's + /// an implicit-def of the call instruction. + virtual void markPhysRegUsed(unsigned PhysReg) = 0; +}; + +struct FormalArgHandler : public IncomingValueHandler { + FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + CCAssignFn AssignFn) + : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {} + + void markPhysRegUsed(unsigned PhysReg) override { + MIRBuilder.getMBB().addLiveIn(PhysReg); + } }; } // End anonymous namespace @@ -170,34 +324,111 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, if (F.isVarArg()) return false; - auto DL = MIRBuilder.getMF().getDataLayout(); + auto &MF = MIRBuilder.getMF(); + auto DL = MF.getDataLayout(); auto &TLI = *getTLI<ARMTargetLowering>(); - auto &Args = F.getArgumentList(); - unsigned ArgIdx = 0; - for (auto &Arg : Args) { - ArgIdx++; - if (!isSupportedType(DL, TLI, Arg.getType())) - return false; + auto Subtarget = TLI.getSubtarget(); - // FIXME: This check as well as ArgIdx are going away as soon as we support - // loading values < 32 bits. - if (ArgIdx > 4 && Arg.getType()->getIntegerBitWidth() != 32) + if (Subtarget->isThumb()) + return false; + + for (auto &Arg : F.args()) + if (!isSupportedType(DL, TLI, Arg.getType())) return false; - } CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg()); SmallVector<ArgInfo, 8> ArgInfos; unsigned Idx = 0; - for (auto &Arg : Args) { + for (auto &Arg : F.args()) { ArgInfo AInfo(VRegs[Idx], Arg.getType()); setArgFlags(AInfo, Idx + 1, DL, F); - ArgInfos.push_back(AInfo); + splitToValueTypes(AInfo, ArgInfos, DL, MF.getRegInfo()); Idx++; } - FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo()); - return handleAssignments(MIRBuilder, AssignFn, ArgInfos, ArgHandler); + FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(), + AssignFn); + return handleAssignments(MIRBuilder, ArgInfos, ArgHandler); +} + +namespace { +struct CallReturnHandler : public IncomingValueHandler { + CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder MIB, CCAssignFn *AssignFn) + : IncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} + + void markPhysRegUsed(unsigned PhysReg) override { + MIB.addDef(PhysReg, RegState::Implicit); + } + + MachineInstrBuilder MIB; +}; +} // End anonymous namespace. + +bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, + CallingConv::ID CallConv, + const MachineOperand &Callee, + const ArgInfo &OrigRet, + ArrayRef<ArgInfo> OrigArgs) const { + MachineFunction &MF = MIRBuilder.getMF(); + const auto &TLI = *getTLI<ARMTargetLowering>(); + const auto &DL = MF.getDataLayout(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (MF.getSubtarget<ARMSubtarget>().genLongCalls()) + return false; + + auto CallSeqStart = MIRBuilder.buildInstr(ARM::ADJCALLSTACKDOWN); + + // Create the call instruction so we can add the implicit uses of arg + // registers, but don't insert it yet. + auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask( + TRI->getCallPreservedMask(MF, CallConv)); + + SmallVector<ArgInfo, 8> ArgInfos; + for (auto Arg : OrigArgs) { + if (!isSupportedType(DL, TLI, Arg.Ty)) + return false; + + if (!Arg.IsFixed) + return false; + + splitToValueTypes(Arg, ArgInfos, DL, MRI); + } + + auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false); + OutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn); + if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler)) + return false; + + // Now we can add the actual call instruction to the correct basic block. + MIRBuilder.insertInstr(MIB); + + if (!OrigRet.Ty->isVoidTy()) { + if (!isSupportedType(DL, TLI, OrigRet.Ty)) + return false; + + ArgInfos.clear(); + splitToValueTypes(OrigRet, ArgInfos, DL, MRI); + + auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false); + CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn); + if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler)) + return false; + } + + // We now know the size of the stack - update the ADJCALLSTACKDOWN + // accordingly. + CallSeqStart.addImm(ArgHandler.StackSize).add(predOps(ARMCC::AL)); + + MIRBuilder.buildInstr(ARM::ADJCALLSTACKUP) + .addImm(ArgHandler.StackSize) + .addImm(0) + .add(predOps(ARMCC::AL)); + + return true; } diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h index 6a1b886b501f..6404c7a2689e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h @@ -34,9 +34,19 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef<unsigned> VRegs) const override; + bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, + const MachineOperand &Callee, const ArgInfo &OrigRet, + ArrayRef<ArgInfo> OrigArgs) const override; + private: bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg, MachineInstrBuilder &Ret) const; + + /// Split an argument into one or more arguments that the CC lowering can cope + /// with (e.g. replace pointers with integers). + void splitToValueTypes(const ArgInfo &OrigArg, + SmallVectorImpl<ArgInfo> &SplitArgs, + const DataLayout &DL, MachineRegisterInfo &MRI) const; }; } // End of namespace llvm #endif diff --git a/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp index 64f187d17e64..e145d0a49ae6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp @@ -8,7 +8,15 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMBasicBlockInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <vector> + using namespace llvm; namespace llvm { @@ -69,4 +77,4 @@ std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF) { return BBInfo; } -} // end namespace +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index be1a37e3e362..23722f1b7f3f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -14,30 +14,50 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMBasicBlockInfo.h" #include "ARMMachineFunctionInfo.h" -#include "MCTargetDesc/ARMAddressingModes.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <new> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "arm-cp-islands" +#define ARM_CP_ISLANDS_OPT_NAME \ + "ARM constant island placement and branch shortening pass" STATISTIC(NumCPEs, "Number of constpool entries"); STATISTIC(NumSplit, "Number of uncond branches inserted"); STATISTIC(NumCBrFixed, "Number of cond branches fixed"); @@ -49,7 +69,6 @@ STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed"); STATISTIC(NumJTMoved, "Number of jump table destination blocks moved"); STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted"); - static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); @@ -64,6 +83,7 @@ static cl::opt<bool> SynthesizeThumb1TBB( "equivalent to the TBB/TBH instructions")); namespace { + /// ARMConstantIslands - Due to limited PC-relative displacements, ARM /// requires constant pool entries to be scattered among the instructions /// inside a function. To do this, it completely ignores the normal LLVM @@ -76,7 +96,6 @@ namespace { /// CPE - A constant pool entry that has been placed somewhere, which /// tracks a list of users. class ARMConstantIslands : public MachineFunctionPass { - std::vector<BasicBlockInfo> BBInfo; /// WaterList - A sorted list of basic blocks where islands could be placed @@ -110,12 +129,14 @@ namespace { bool NegOk; bool IsSoImm; bool KnownAlignment; + CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp, bool neg, bool soimm) : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm), KnownAlignment(false) { HighWaterMark = CPEMI->getParent(); } + /// getMaxDisp - Returns the maximum displacement supported by MI. /// Correct for unknown alignment. /// Conservatively subtract 2 bytes to handle weird alignment effects. @@ -135,6 +156,7 @@ namespace { MachineInstr *CPEMI; unsigned CPI; unsigned RefCount; + CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0) : CPEMI(cpemi), CPI(cpi), RefCount(rc) {} }; @@ -148,7 +170,7 @@ namespace { /// The first half of CPEntries contains generic constants, the second half /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up /// which vector it will be in here. - std::vector<std::vector<CPEntry> > CPEntries; + std::vector<std::vector<CPEntry>> CPEntries; /// Maps a JT index to the offset in CPEntries containing copies of that /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity. @@ -167,6 +189,7 @@ namespace { unsigned MaxDisp : 31; bool isCond : 1; unsigned UncondBr; + ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr) : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {} }; @@ -195,8 +218,10 @@ namespace { bool isThumb1; bool isThumb2; bool isPositionIndependentOrROPI; + public: static char ID; + ARMConstantIslands() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -207,7 +232,7 @@ namespace { } StringRef getPassName() const override { - return "ARM constant island placement and branch shortening pass"; + return ARM_CP_ISLANDS_OPT_NAME; } private: @@ -264,8 +289,10 @@ namespace { U.getMaxDisp(), U.NegOk, U.IsSoImm); } }; + char ARMConstantIslands::ID = 0; -} + +} // end anonymous namespace /// verify - check BBOffsets, BBSizes, alignment of islands void ARMConstantIslands::verify() { @@ -295,8 +322,9 @@ void ARMConstantIslands::verify() { #endif } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// print block size and offset information - debugging -void ARMConstantIslands::dumpBBs() { +LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() { DEBUG({ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { const BasicBlockInfo &BBI = BBInfo[J]; @@ -308,12 +336,7 @@ void ARMConstantIslands::dumpBBs() { } }); } - -/// createARMConstantIslandPass - returns an instance of the constpool -/// island pass. -FunctionPass *llvm::createARMConstantIslandPass() { - return new ARMConstantIslands(); -} +#endif bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { MF = &mf; @@ -782,6 +805,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { case ARM::LDRcp: case ARM::t2LDRpci: case ARM::t2LDRHpci: + case ARM::t2LDRBpci: Bits = 12; // +-offset_12 NegOk = true; break; @@ -873,7 +897,6 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { WaterList.insert(IP, NewBB); } - /// Split the basic block containing MI into two blocks, which are joined by /// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. @@ -897,8 +920,9 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { if (!isThumb) BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB); else - BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB) - .addImm(ARMCC::AL).addReg(0); + BuildMI(OrigBB, DebugLoc(), TII->get(Opc)) + .addMBB(NewBB) + .add(predOps(ARMCC::AL)); ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. @@ -1296,8 +1320,9 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, if (!isThumb) BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); else - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) - .addImm(ARMCC::AL).addReg(0); + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)) + .addMBB(NewMBB) + .add(predOps(ARMCC::AL)); unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); @@ -1477,7 +1502,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, // add it to the island. U.HighWaterMark = NewIsland; U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc()) - .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size); + .addImm(ID) + .add(CPEMI->getOperand(1)) + .addImm(Size); CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); ++NumCPEs; @@ -1681,8 +1708,9 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { Br.MI = &MBB->back(); BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); if (isThumb) - BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) - .addImm(ARMCC::AL).addReg(0); + BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)) + .addMBB(DestBB) + .add(predOps(ARMCC::AL)); else BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); @@ -1709,8 +1737,15 @@ bool ARMConstantIslands::undoLRSpillRestore() { MI->getNumExplicitOperands() == 3) { // Create the new insn and copy the predicate from the old. BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)); + .add(MI->getOperand(0)) + .add(MI->getOperand(1)); + MI->eraseFromParent(); + MadeChange = true; + } + if (MI->getOpcode() == ARM::tPUSH && + MI->getOperand(2).getReg() == ARM::LR && + MI->getNumExplicitOperands() == 3) { + // Just remove the push. MI->eraseFromParent(); MadeChange = true; } @@ -1792,13 +1827,12 @@ bool ARMConstantIslands::optimizeThumb2Branches() { Bits = 11; Scale = 2; break; - case ARM::t2Bcc: { + case ARM::t2Bcc: NewOpc = ARM::tBcc; Bits = 8; Scale = 2; break; } - } if (NewOpc) { unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); @@ -1983,6 +2017,54 @@ static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) { &*MBB->begin() == CPEMI; } +static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI, + MachineInstr *JumpMI, + unsigned &DeadSize) { + // Remove a dead add between the LEA and JT, which used to compute EntryReg, + // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg + // and is not clobbered / used. + MachineInstr *RemovableAdd = nullptr; + unsigned EntryReg = JumpMI->getOperand(0).getReg(); + + // Find the last ADD to set EntryReg + MachineBasicBlock::iterator I(LEAMI); + for (++I; &*I != JumpMI; ++I) { + if (I->getOpcode() == ARM::t2ADDrs && I->getOperand(0).getReg() == EntryReg) + RemovableAdd = &*I; + } + + if (!RemovableAdd) + return; + + // Ensure EntryReg is not clobbered or used. + MachineBasicBlock::iterator J(RemovableAdd); + for (++J; &*J != JumpMI; ++J) { + for (unsigned K = 0, E = J->getNumOperands(); K != E; ++K) { + const MachineOperand &MO = J->getOperand(K); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && MO.getReg() == EntryReg) + return; + if (MO.isUse() && MO.getReg() == EntryReg) + return; + } + } + + DEBUG(dbgs() << "Removing Dead Add: " << *RemovableAdd); + RemovableAdd->eraseFromParent(); + DeadSize += 4; +} + +static bool registerDefinedBetween(unsigned Reg, + MachineBasicBlock::iterator From, + MachineBasicBlock::iterator To, + const TargetRegisterInfo *TRI) { + for (auto I = From; I != To; ++I) + if (I->modifiesRegister(Reg, TRI)) + return true; + return false; +} + /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. bool ARMConstantIslands::optimizeThumb2JumpTables() { @@ -2060,6 +2142,12 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { IdxReg = Shift->getOperand(2).getReg(); unsigned ShiftedIdxReg = Shift->getOperand(0).getReg(); + // It's important that IdxReg is live until the actual TBB/TBH. Most of + // the range is checked later, but the LEA might still clobber it and not + // actually get removed. + if (BaseReg == IdxReg && !jumpTableFollowsTB(MI, User.CPEMI)) + continue; + MachineInstr *Load = User.MI->getNextNode(); if (Load->getOpcode() != ARM::tLDRr) continue; @@ -2069,6 +2157,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { continue; // If we're in PIC mode, there should be another ADD following. + auto *TRI = STI->getRegisterInfo(); if (isPositionIndependentOrROPI) { MachineInstr *Add = Load->getNextNode(); if (Add->getOpcode() != ARM::tADDrr || @@ -2078,22 +2167,26 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { continue; if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg()) continue; - + if (registerDefinedBetween(IdxReg, Add->getNextNode(), MI, TRI)) + // IdxReg gets redefined in the middle of the sequence. + continue; Add->eraseFromParent(); DeadSize += 2; } else { if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg()) continue; + if (registerDefinedBetween(IdxReg, Load->getNextNode(), MI, TRI)) + // IdxReg gets redefined in the middle of the sequence. + continue; } - - + // Now safe to delete the load and lsl. The LEA will be removed later. CanDeleteLEA = true; Shift->eraseFromParent(); Load->eraseFromParent(); DeadSize += 4; } - + DEBUG(dbgs() << "Shrink JT: " << *MI); MachineInstr *CPEMI = User.CPEMI; unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; @@ -2117,7 +2210,10 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { NewJTMI->getOperand(0).setReg(ARM::PC); NewJTMI->getOperand(0).setIsKill(false); - if (CanDeleteLEA) { + if (CanDeleteLEA) { + if (isThumb2) + RemoveDeadAddBetweenLEAAndJT(User.MI, MI, DeadSize); + User.MI->eraseFromParent(); DeadSize += isThumb2 ? 4 : 2; @@ -2238,13 +2334,11 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { if (isThumb2) BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)) .addMBB(BB) - .addImm(ARMCC::AL) - .addReg(0); + .add(predOps(ARMCC::AL)); else BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB)) .addMBB(BB) - .addImm(ARMCC::AL) - .addReg(0); + .add(predOps(ARMCC::AL)); // Update internal data structures to account for the newly inserted MBB. MF->RenumberBlocks(NewBB); @@ -2256,3 +2350,12 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { ++NumJTInserted; return NewBB; } + +/// createARMConstantIslandPass - returns an instance of the constpool +/// island pass. +FunctionPass *llvm::createARMConstantIslandPass() { + return new ARMConstantIslands(); +} + +INITIALIZE_PASS(ARMConstantIslands, "arm-cp-islands", ARM_CP_ISLANDS_OPT_NAME, + false, false) diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp index 2d1602873ce0..9705c8b718b7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -13,13 +13,17 @@ #include "ARMConstantPoolValue.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include <cstdlib> + using namespace llvm; //===----------------------------------------------------------------------===// @@ -44,7 +48,7 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id, LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier), AddCurrentAddress(addCurrentAddress) {} -ARMConstantPoolValue::~ARMConstantPoolValue() {} +ARMConstantPoolValue::~ARMConstantPoolValue() = default; StringRef ARMConstantPoolValue::getModifierText() const { switch (Modifier) { @@ -94,9 +98,11 @@ ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) { return false; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void ARMConstantPoolValue::dump() const { errs() << " " << *this; } +#endif void ARMConstantPoolValue::print(raw_ostream &O) const { if (Modifier) O << "(" << getModifierText() << ")"; diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index 5f61832aa740..61c521581f79 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -14,10 +14,11 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H #define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ErrorHandling.h" -#include <cstddef> +#include <string> +#include <vector> namespace llvm { @@ -29,6 +30,7 @@ class LLVMContext; class MachineBasicBlock; namespace ARMCP { + enum ARMCPKind { CPValue, CPExtSymbol, @@ -47,7 +49,8 @@ namespace ARMCP { SECREL, /// Section Relative (Windows TLS) SBREL, /// Static Base Relative (RWPI) }; -} + +} // end namespace ARMCP /// ARMConstantPoolValue - ARM specific constantpool value. This is used to /// represent PC-relative displacement between the address of the load @@ -169,9 +172,11 @@ public: const GlobalValue *getGV() const; const BlockAddress *getBlockAddress() const; + const GlobalVariable *getPromotedGlobal() const { return dyn_cast_or_null<GlobalVariable>(GVar); } + const Constant *getPromotedGlobalInit() const { return CVal; } @@ -186,6 +191,7 @@ public: void addSelectionDAGCSEId(FoldingSetNodeID &ID) override; void print(raw_ostream &O) const override; + static bool classof(const ARMConstantPoolValue *APV) { return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA() || APV->isPromotedGlobal(); @@ -267,6 +273,6 @@ public: } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index baa4e0330cf4..e0aecff2633b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -19,6 +19,7 @@ #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -30,6 +31,7 @@ #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" + using namespace llvm; #define DEBUG_TYPE "arm-pseudo" @@ -97,9 +99,9 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, const MachineOperand &MO = OldMI.getOperand(i); assert(MO.isReg() && MO.getReg()); if (MO.isUse()) - UseMI.addOperand(MO); + UseMI.add(MO); else - DefMI.addOperand(MO); + DefMI.add(MO); } } @@ -415,14 +417,14 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); if (TableEntry->isUpdating) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the am6offset operand. if (TableEntry->hasWritebackOperand) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // For an instruction writing double-spaced subregs, the pseudo instruction // has an extra operand that is a use of the super-register. Record the @@ -432,15 +434,15 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { SrcOpIdx = OpIdx++; // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the super-register source operand used for double-spaced subregs over // to the new instruction as an implicit operand. if (SrcOpIdx != 0) { MachineOperand MO = MI.getOperand(SrcOpIdx); MO.setImplicit(true); - MIB.addOperand(MO); + MIB.add(MO); } // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); @@ -467,14 +469,14 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; if (TableEntry->isUpdating) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the am6offset operand. if (TableEntry->hasWritebackOperand) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); @@ -490,8 +492,8 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MIB.addReg(D3, getUndefRegState(SrcIsUndef)); // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. MIB->addRegisterKilled(SrcReg, TRI, true); @@ -549,14 +551,14 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { } if (TableEntry->isUpdating) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the am6offset operand. if (TableEntry->hasWritebackOperand) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Grab the super-register source. MachineOperand MO = MI.getOperand(OpIdx++); @@ -579,12 +581,12 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { OpIdx += 1; // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the super-register source to be an implicit source. MO.setImplicit(true); - MIB.addOperand(MO); + MIB.add(MO); if (TableEntry->IsLoad) // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); @@ -605,9 +607,9 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned OpIdx = 0; // Transfer the destination register operand. - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); if (IsExt) - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); @@ -616,11 +618,11 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MIB.addReg(D0); // Copy the other source register operand. - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Add an implicit kill and use for the super-reg. MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill)); @@ -659,6 +661,7 @@ static bool IsAnAddressOperand(const MachineOperand &MO) { return false; case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_Predicate: + case MachineOperand::MO_Placeholder: llvm_unreachable("should not exist post-isel"); } llvm_unreachable("unhandled machine operand type"); @@ -696,8 +699,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, HI16 = HI16.addImm(SOImmValV2); LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - LO16.addImm(Pred).addReg(PredReg).addReg(0); - HI16.addImm(Pred).addReg(PredReg).addReg(0); + LO16.addImm(Pred).addReg(PredReg).add(condCodeOp()); + HI16.addImm(Pred).addReg(PredReg).add(condCodeOp()); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); return; @@ -797,7 +800,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, .addReg(Desired.getReg(), RegState::Kill); if (!IsThumb) MIB.addImm(0); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); } // .Lloadcmp: @@ -814,12 +817,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MIB.addReg(Addr.getReg()); if (LdrexOp == ARM::t2LDREX) MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset. - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; - AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) - .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) - .addOperand(Desired)); + BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) + .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) + .add(Desired) + .add(predOps(ARMCC::AL)); unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; BuildMI(LoadCmpBB, DL, TII->get(Bcc)) .addMBB(DoneBB) @@ -838,16 +842,17 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg); - MIB.addOperand(New); - MIB.addOperand(Addr); + MIB.add(New); + MIB.add(Addr); if (StrexOp == ARM::t2STREX) MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; - AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, RegState::Kill) - .addImm(0)); + BuildMI(StoreBB, DL, TII->get(CMPri)) + .addReg(StatusReg, RegState::Kill) + .addImm(0) + .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) .addMBB(LoadCmpBB) .addImm(ARMCC::NE) @@ -927,13 +932,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); - MIB.addReg(Addr.getReg()); - AddDefaultPred(MIB); + MIB.addReg(Addr.getReg()).add(predOps(ARMCC::AL)); unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; - AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) - .addReg(DestLo, getKillRegState(Dest.isDead())) - .addReg(DesiredLo, getKillRegState(Desired.isDead()))); + BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) + .addReg(DestLo, getKillRegState(Dest.isDead())) + .addReg(DesiredLo, getKillRegState(Desired.isDead())) + .add(predOps(ARMCC::AL)); BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) .addReg(DestHi, getKillRegState(Dest.isDead())) @@ -959,13 +964,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg); addExclusiveRegPair(MIB, New, 0, IsThumb, TRI); - MIB.addOperand(Addr); - AddDefaultPred(MIB); + MIB.add(Addr).add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; - AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, RegState::Kill) - .addImm(0)); + BuildMI(StoreBB, DL, TII->get(CMPri)) + .addReg(StatusReg, RegState::Kill) + .addImm(0) + .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) .addMBB(LoadCmpBB) .addImm(ARMCC::NE) @@ -1026,7 +1031,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Add the default predicate in Thumb mode. if (STI->isThumb()) - MIB.addImm(ARMCC::AL).addReg(0); + MIB.add(predOps(ARMCC::AL)); } else if (RetOpcode == ARM::TCRETURNri) { BuildMI(MBB, MBBI, dl, TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)) @@ -1047,9 +1052,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), MI.getOperand(1).getReg()) - .addOperand(MI.getOperand(2)) - .addImm(MI.getOperand(3).getImm()) // 'pred' - .addOperand(MI.getOperand(4)); + .add(MI.getOperand(2)) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .add(MI.getOperand(4)); MI.eraseFromParent(); return true; @@ -1059,10 +1064,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) - .addOperand(MI.getOperand(2)) - .addImm(MI.getOperand(3).getImm()) // 'pred' - .addOperand(MI.getOperand(4)) - .addReg(0); // 's' bit + .add(MI.getOperand(2)) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .add(MI.getOperand(4)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; @@ -1070,11 +1075,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVCCsi: { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), (MI.getOperand(1).getReg())) - .addOperand(MI.getOperand(2)) - .addImm(MI.getOperand(3).getImm()) - .addImm(MI.getOperand(4).getImm()) // 'pred' - .addOperand(MI.getOperand(5)) - .addReg(0); // 's' bit + .add(MI.getOperand(2)) + .addImm(MI.getOperand(3).getImm()) + .addImm(MI.getOperand(4).getImm()) // 'pred' + .add(MI.getOperand(5)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; @@ -1082,12 +1087,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVCCsr: { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), (MI.getOperand(1).getReg())) - .addOperand(MI.getOperand(2)) - .addOperand(MI.getOperand(3)) - .addImm(MI.getOperand(4).getImm()) - .addImm(MI.getOperand(5).getImm()) // 'pred' - .addOperand(MI.getOperand(6)) - .addReg(0); // 's' bit + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .addImm(MI.getOperand(4).getImm()) + .addImm(MI.getOperand(5).getImm()) // 'pred' + .add(MI.getOperand(6)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; @@ -1097,9 +1102,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), MI.getOperand(1).getReg()) - .addImm(MI.getOperand(2).getImm()) - .addImm(MI.getOperand(3).getImm()) // 'pred' - .addOperand(MI.getOperand(4)); + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .add(MI.getOperand(4)); MI.eraseFromParent(); return true; } @@ -1108,10 +1113,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) - .addImm(MI.getOperand(2).getImm()) - .addImm(MI.getOperand(3).getImm()) // 'pred' - .addOperand(MI.getOperand(4)) - .addReg(0); // 's' bit + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .add(MI.getOperand(4)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; @@ -1121,10 +1126,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) - .addImm(MI.getOperand(2).getImm()) - .addImm(MI.getOperand(3).getImm()) // 'pred' - .addOperand(MI.getOperand(4)) - .addReg(0); // 's' bit + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .add(MI.getOperand(4)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; @@ -1143,11 +1148,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), MI.getOperand(1).getReg()) - .addOperand(MI.getOperand(2)) - .addImm(MI.getOperand(3).getImm()) - .addImm(MI.getOperand(4).getImm()) // 'pred' - .addOperand(MI.getOperand(5)) - .addReg(0); // 's' bit + .add(MI.getOperand(2)) + .addImm(MI.getOperand(3).getImm()) + .addImm(MI.getOperand(4).getImm()) // 'pred' + .add(MI.getOperand(5)) + .add(condCodeOp()); // 's' bit MI.eraseFromParent(); return true; } @@ -1187,10 +1192,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, "bits set."); unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(bicOpc), ARM::R6) - .addReg(ARM::R6, RegState::Kill) - .addImm(MaxAlign-1))); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6) + .addReg(ARM::R6, RegState::Kill) + .addImm(MaxAlign - 1) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } } @@ -1201,24 +1207,25 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVsrl_flag: case ARM::MOVsra_flag: { // These are just fancy MOVs instructions. - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), - MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)) - .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? - ARM_AM::lsr : ARM_AM::asr), - 1))) - .addReg(ARM::CPSR, RegState::Define); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), + MI.getOperand(0).getReg()) + .add(MI.getOperand(1)) + .addImm(ARM_AM::getSORegOpc( + (Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1)) + .add(predOps(ARMCC::AL)) + .addReg(ARM::CPSR, RegState::Define); MI.eraseFromParent(); return true; } case ARM::RRX: { // This encodes as "MOVs Rd, Rm, rrx MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi), - MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)) - .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))) - .addReg(0); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), + MI.getOperand(0).getReg()) + .add(MI.getOperand(1)) + .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); return true; @@ -1241,18 +1248,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); if (!Thumb) MIB.addImm(0); - MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB.add(predOps(ARMCC::AL)); MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Thumb ? ARM::tBLXr : ARM::BLX)); if (Thumb) - MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB.add(predOps(ARMCC::AL)); MIB.addReg(Reg, RegState::Kill); } else { MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Thumb ? ARM::tBL : ARM::BL)); if (Thumb) - MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0); + MIB.add(predOps(ARMCC::AL)); MIB.addExternalSymbol("__aeabi_read_tp", 0); } @@ -1268,15 +1275,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); MachineInstrBuilder MIB1 = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(NewLdOpc), DstReg) - .addOperand(MI.getOperand(1))); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) + .add(MI.getOperand(1)) + .add(predOps(ARMCC::AL)); MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::tPICADD)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg) - .addOperand(MI.getOperand(2)); + MachineInstrBuilder MIB2 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .add(MI.getOperand(2)); TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); return true; @@ -1319,7 +1326,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); if (IsARM) MIB.addImm(0); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); if (IsPIC) { MachineInstrBuilder MIB = @@ -1329,7 +1336,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addImm(ARMPCLabelIndex); if (IsARM) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); } MI.eraseFromParent(); @@ -1368,7 +1375,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg).addImm(LabelId); if (isARM) { - AddDefaultPred(MIB3); + MIB3.add(predOps(ARMCC::AL)); if (Opcode == ARM::MOV_ga_pcrel_ldr) MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } @@ -1388,9 +1395,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) .addReg(ARM::LR) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)) - .addOperand(MI.getOperand(2)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) .addReg(ARM::CPSR, RegState::Undef); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); @@ -1407,11 +1414,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned DstReg = MI.getOperand(OpIdx++).getReg(); // Copy the source register. - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Add the destination operands (D subregs). unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0); @@ -1438,11 +1445,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); // Copy the destination register. - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Copy the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); + MIB.add(MI.getOperand(OpIdx++)); // Add the source operands (D subregs). unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index df4dcb375750..01e062bd185c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" @@ -21,30 +22,61 @@ #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include <cassert> +#include <cstdint> +#include <utility> + using namespace llvm; namespace { @@ -54,24 +86,22 @@ namespace { enum { RegBase, FrameIndexBase - } BaseType; + } BaseType = RegBase; union { unsigned Reg; int FI; } Base; - int Offset; + int Offset = 0; // Innocuous defaults for our address. - Address() - : BaseType(RegBase), Offset(0) { - Base.Reg = 0; - } + Address() { + Base.Reg = 0; + } } Address; class ARMFastISel final : public FastISel { - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; @@ -99,8 +129,9 @@ class ARMFastISel final : public FastISel { Context = &funcInfo.Fn->getContext(); } - // Code from FastISel.cpp. private: + // Code from FastISel.cpp. + unsigned fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill); @@ -117,18 +148,18 @@ class ARMFastISel final : public FastISel { uint64_t Imm); // Backend specific FastISel code. - private: + bool fastSelectInstruction(const Instruction *I) override; unsigned fastMaterializeConstant(const Constant *C) override; unsigned fastMaterializeAlloca(const AllocaInst *AI) override; bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) override; bool fastLowerArguments() override; - private: + #include "ARMGenFastISel.inc" // Instruction selection routines. - private: + bool SelectLoad(const Instruction *I); bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); @@ -151,12 +182,12 @@ class ARMFastISel final : public FastISel { bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy); // Utility routines. - private: + bool isPositionIndependent() const; bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, - bool isZExt); + bool isZExt, bool isEquality); bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); @@ -179,7 +210,7 @@ class ARMFastISel final : public FastISel { const TargetLowering *getTargetLowering() { return &TLI; } // Call handling routines. - private: + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return, bool isVarArg); @@ -198,7 +229,7 @@ class ARMFastISel final : public FastISel { bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); // OptionalDef handling routines. - private: + bool isARMNEONPred(const MachineInstr *MI); bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); @@ -256,17 +287,13 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { // Are we NEON in ARM mode and have a predicate operand? If so, I know // we're not predicable but add it anyways. if (isARMNEONPred(MI)) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); // Do we optionally set a predicate? Preds is size > 0 iff the predicate // defines CPSR. All other OptionalDefines in ARM are the CCR register. bool CPSR = false; - if (DefinesOptionalPredicate(MI, &CPSR)) { - if (CPSR) - AddDefaultT1CC(MIB); - else - AddDefaultCC(MIB); - } + if (DefinesOptionalPredicate(MI, &CPSR)) + MIB.add(CPSR ? t1CondCodeOp() : condCodeOp()); return MIB; } @@ -434,7 +461,6 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) { } unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { - if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; @@ -739,7 +765,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { TmpOffset += SL->getElementOffset(Idx); } else { uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); - for (;;) { + while (true) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. TmpOffset += CI->getSExtValue() * S; @@ -971,7 +997,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // Create the base instruction, then add the operands. if (allocReg) ResultReg = createResultReg(RC); - assert (ResultReg > 255 && "Expected an allocated virtual register."); + assert(ResultReg > 255 && "Expected an allocated virtual register."); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3); @@ -1216,7 +1242,6 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // behavior. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { - // Get the compare predicate. // Try to take advantage of fallthrough opportunities. CmpInst::Predicate Predicate = CI->getPredicate(); @@ -1231,7 +1256,8 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), + CI->isEquality())) return false; unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; @@ -1318,14 +1344,16 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { } bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, - bool isZExt) { + bool isZExt, bool isEquality) { Type *Ty = Src1Value->getType(); EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); - bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); - if (isFloat && !Subtarget->hasVFP2()) + if (Ty->isFloatTy() && !Subtarget->hasVFP2()) + return false; + + if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP())) return false; // Check to see if the 2nd operand is a constant that we can encode directly @@ -1364,10 +1392,18 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // TODO: Verify compares. case MVT::f32: isICmp = false; - CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; + // Equality comparisons shouldn't raise Invalid on uordered inputs. + if (isEquality) + CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS; + else + CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; break; case MVT::f64: isICmp = false; + // Equality comparisons shouldn't raise Invalid on uordered inputs. + if (isEquality) + CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD; + else CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED; break; case MVT::i1: @@ -1444,7 +1480,8 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(), + CI->isEquality())) return false; // Now set a register based on the comparison. Explicitly set the predicates @@ -1466,7 +1503,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { bool ARMFastISel::SelectFPExt(const Instruction *I) { // Make sure we have VFP and that we're extending float to double. - if (!Subtarget->hasVFP2()) return false; + if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false; Value *V = I->getOperand(0); if (!I->getType()->isDoubleTy() || @@ -1485,7 +1522,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) { bool ARMFastISel::SelectFPTrunc(const Instruction *I) { // Make sure we have VFP and that we're truncating double to float. - if (!Subtarget->hasVFP2()) return false; + if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false; Value *V = I->getOperand(0); if (!(I->getType()->isFloatTy() && @@ -1536,7 +1573,8 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { unsigned Opc; if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS; - else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD; + else if (Ty->isDoubleTy() && !Subtarget->isFPOnlySP()) + Opc = isSigned ? ARM::VSITOD : ARM::VUITOD; else return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); @@ -1561,7 +1599,8 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) { unsigned Opc; Type *OpTy = I->getOperand(0)->getType(); if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS; - else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD; + else if (OpTy->isDoubleTy() && !Subtarget->isFPOnlySP()) + Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD; else return false; // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg. @@ -1596,7 +1635,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { bool UseImm = false; bool isNegativeImm = false; if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) { - assert (VT == MVT::i32 && "Expecting an i32."); + assert(VT == MVT::i32 && "Expecting an i32."); Imm = (int)ConstInt->getValue().getZExtValue(); if (Imm < 0) { isNegativeImm = true; @@ -1765,8 +1804,9 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { // if we have them. // FIXME: It'd be nice to use NEON instructions. Type *Ty = I->getType(); - bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); - if (isFloat && !Subtarget->hasVFP2()) + if (Ty->isFloatTy() && !Subtarget->hasVFP2()) + return false; + if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP())) return false; unsigned Opc; @@ -1926,7 +1966,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, case CCValAssign::SExt: { MVT DestVT = VA.getLocVT(); Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false); - assert (Arg != 0 && "Failed to emit a sext"); + assert(Arg != 0 && "Failed to emit a sext"); ArgVT = DestVT; break; } @@ -1935,7 +1975,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); - assert (Arg != 0 && "Failed to emit a zext"); + assert(Arg != 0 && "Failed to emit a zext"); ArgVT = DestVT; break; } @@ -2230,7 +2270,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { DbgLoc, TII.get(CallOpc)); // BL / BLX don't take a predicate, but tBL / tBLX do. if (isThumb2) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); if (Subtarget->genLongCalls()) MIB.addReg(CalleeReg); else @@ -2311,19 +2351,19 @@ bool ARMFastISel::SelectCall(const Instruction *I, break; ISD::ArgFlagsTy Flags; - unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attribute::SExt)) + unsigned ArgIdx = i - CS.arg_begin(); + if (CS.paramHasAttr(ArgIdx, Attribute::SExt)) Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) + if (CS.paramHasAttr(ArgIdx, Attribute::ZExt)) Flags.setZExt(); // FIXME: Only handle *easy* calls for now. - if (CS.paramHasAttr(AttrInd, Attribute::InReg) || - CS.paramHasAttr(AttrInd, Attribute::StructRet) || - CS.paramHasAttr(AttrInd, Attribute::SwiftSelf) || - CS.paramHasAttr(AttrInd, Attribute::SwiftError) || - CS.paramHasAttr(AttrInd, Attribute::Nest) || - CS.paramHasAttr(AttrInd, Attribute::ByVal)) + if (CS.paramHasAttr(ArgIdx, Attribute::InReg) || + CS.paramHasAttr(ArgIdx, Attribute::StructRet) || + CS.paramHasAttr(ArgIdx, Attribute::SwiftSelf) || + CS.paramHasAttr(ArgIdx, Attribute::SwiftError) || + CS.paramHasAttr(ArgIdx, Attribute::Nest) || + CS.paramHasAttr(ArgIdx, Attribute::ByVal)) return false; Type *ArgTy = (*i)->getType(); @@ -2373,7 +2413,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, // ARM calls don't take a predicate, but tBL / tBLX do. if(isThumb2) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); if (UseReg) MIB.addReg(CalleeReg); else if (!IntrMemName) @@ -2418,7 +2458,7 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, else if (Len >= 2) VT = MVT::i16; else { - assert (Len == 1 && "Expected a length of 1!"); + assert(Len == 1 && "Expected a length of 1!"); VT = MVT::i8; } } else { @@ -2433,9 +2473,9 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, bool RV; unsigned ResultReg; RV = ARMEmitLoad(VT, ResultReg, Src); - assert (RV == true && "Should be able to handle this load."); + assert(RV && "Should be able to handle this load."); RV = ARMEmitStore(VT, ResultReg, Dest); - assert (RV == true && "Should be able to handle this store."); + assert(RV && "Should be able to handle this store."); (void)RV; unsigned Size = VT.getSizeInBits()/8; @@ -2687,9 +2727,11 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, if (setsCPSR) MIB.addReg(ARM::CPSR, RegState::Define); SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR); - AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc)); + MIB.addReg(SrcReg, isKill * RegState::Kill) + .addImm(ImmEnc) + .add(predOps(ARMCC::AL)); if (hasS) - AddDefaultCC(MIB); + MIB.add(condCodeOp()); // Second instruction consumes the first's result. SrcReg = ResultReg; } @@ -2779,7 +2821,6 @@ bool ARMFastISel::SelectShift(const Instruction *I, // TODO: SoftFP support. bool ARMFastISel::fastSelectInstruction(const Instruction *I) { - switch (I->getOpcode()) { case Instruction::Load: return SelectLoad(I); @@ -2849,6 +2890,7 @@ bool ARMFastISel::fastSelectInstruction(const Instruction *I) { } namespace { + // This table describes sign- and zero-extend instructions which can be // folded into a preceding load. All of these extends have an immediate // (sometimes a mask and sometimes a shift) that's applied after @@ -2865,7 +2907,8 @@ const struct FoldableLoadExtendsStruct { { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 }, { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 } }; -} + +} // end anonymous namespace /// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, @@ -2933,7 +2976,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, .addConstantPoolIndex(Idx); if (Opc == ARM::LDRcp) MIB.addImm(0); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); // Fix the address by adding pc. unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); @@ -2944,7 +2987,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, .addReg(TempReg) .addImm(ARMPCLabelIndex); if (!Subtarget->isThumb()) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); if (UseGOT_PREL && Subtarget->isThumb()) { unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); @@ -3010,7 +3053,6 @@ bool ARMFastISel::fastLowerArguments() { } } - static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -3035,6 +3077,7 @@ bool ARMFastISel::fastLowerArguments() { } namespace llvm { + FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel()) @@ -3042,4 +3085,5 @@ namespace llvm { return nullptr; } -} + +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMFeatures.h b/contrib/llvm/lib/Target/ARM/ARMFeatures.h index 0c910ab6130f..8c0df4c2cbf9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFeatures.h +++ b/contrib/llvm/lib/Target/ARM/ARMFeatures.h @@ -19,10 +19,10 @@ namespace llvm { template<typename InstrType> // could be MachineInstr or MCInst -bool IsCPSRDead(InstrType *Instr); +bool IsCPSRDead(const InstrType *Instr); template<typename InstrType> // could be MachineInstr or MCInst -inline bool isV8EligibleForIT(InstrType *Instr) { +inline bool isV8EligibleForIT(const InstrType *Instr) { switch (Instr->getOpcode()) { default: return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index c72db8aca108..37be22bed540 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -16,19 +16,49 @@ #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <utility> +#include <vector> #define DEBUG_TYPE "arm-frame-lowering" @@ -180,6 +210,7 @@ static bool WindowsRequiresStackProbe(const MachineFunction &MF, } namespace { + struct StackAdjustingInsts { struct InstInfo { MachineBasicBlock::iterator I; @@ -196,7 +227,8 @@ struct StackAdjustingInsts { } void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) { - auto Info = find_if(Insts, [&](InstInfo &Info) { return Info.I == I; }); + auto Info = + llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; }); assert(Info != Insts.end() && "invalid sp adjusting instruction"); Info->SPAdjust += ExtraBytes; } @@ -219,7 +251,8 @@ struct StackAdjustingInsts { } } }; -} + +} // end anonymous namespace /// Emit an instruction sequence that will align the address in /// register Reg by zero-ing out the lower bits. For versions of the @@ -252,35 +285,40 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, // lsr Reg, Reg, log2(Alignment) // lsl Reg, Reg, log2(Alignment) if (CanUseBFC) { - AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) - .addReg(Reg, RegState::Kill) - .addImm(~AlignMask)); + BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask) + .add(predOps(ARMCC::AL)); } else if (AlignMask <= 255) { - AddDefaultCC( - AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) - .addReg(Reg, RegState::Kill) - .addImm(AlignMask))); + BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) + .addReg(Reg, RegState::Kill) + .addImm(AlignMask) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } else { assert(!MustBeSingleInstruction && "Shouldn't call emitAligningInstructions demanding a single " "instruction to be emitted for large stack alignment for a target " "without BFC."); - AddDefaultCC(AddDefaultPred( - BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) - .addReg(Reg, RegState::Kill) - .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)))); - AddDefaultCC(AddDefaultPred( - BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) - .addReg(Reg, RegState::Kill) - .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)))); + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } } else { // Since this is only reached for Thumb-2 targets, the BFC instruction // should always be available. assert(CanUseBFC); - AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) - .addReg(Reg, RegState::Kill) - .addImm(~AlignMask)); + BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask) + .add(predOps(ARMCC::AL)); } } @@ -448,9 +486,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, uint32_t NumWords = NumBytes >> 2; if (NumWords < 65536) - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) - .addImm(NumWords) - .setMIFlags(MachineInstr::FrameSetup)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); else BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) .addImm(NumWords) @@ -462,10 +501,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case CodeModel::Default: case CodeModel::Kernel: BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addExternalSymbol("__chkstk") - .addReg(ARM::R4, RegState::Implicit) - .setMIFlags(MachineInstr::FrameSetup); + .add(predOps(ARMCC::AL)) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); break; case CodeModel::Large: case CodeModel::JITDefault: @@ -474,18 +513,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlags(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addReg(ARM::R12, RegState::Kill) - .addReg(ARM::R4, RegState::Implicit) - .setMIFlags(MachineInstr::FrameSetup); + .add(predOps(ARMCC::AL)) + .addReg(ARM::R12, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); break; } - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), - ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addReg(ARM::R4, RegState::Kill) - .setMIFlags(MachineInstr::FrameSetup))); + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP) + .addReg(ARM::SP, RegState::Kill) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); NumBytes = 0; } @@ -657,12 +697,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // -- out lower bits in r4 // mov sp, r4 // FIXME: It will be better just to find spare register here. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) - .addReg(ARM::SP, RegState::Kill)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) + .addReg(ARM::SP, RegState::Kill) + .add(predOps(ARMCC::AL)); emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, false); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4, RegState::Kill)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4, RegState::Kill) + .add(predOps(ARMCC::AL)); } AFI->setShouldRestoreSPFromFP(true); @@ -675,14 +717,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // FIXME: Clarify FrameSetup flags here. if (RegInfo->hasBasePointer(MF)) { if (isARM) - BuildMI(MBB, MBBI, dl, - TII.get(ARM::MOVr), RegInfo->getBaseRegister()) - .addReg(ARM::SP) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister()) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); else - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - RegInfo->getBaseRegister()) - .addReg(ARM::SP)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister()) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); } // If the frame has variable sized objects then the epilogue must restore @@ -757,19 +799,21 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "No scratch register to restore SP from FP!"); emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - ARM::SP) - .addReg(ARM::R4)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)); } } else { // Thumb2 or ARM. if (isARM) BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) - .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + .addReg(FramePtr) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); else - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - ARM::SP) - .addReg(FramePtr)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(FramePtr) + .add(predOps(ARMCC::AL)); } } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) @@ -829,7 +873,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, // When dynamically realigning the stack, use the frame pointer for // parameters, and the stack/base pointer for locals. if (RegInfo->needsStackRealignment(MF)) { - assert (hasFP(MF) && "dynamic stack realignment without a FP!"); + assert(hasFP(MF) && "dynamic stack realignment without a FP!"); if (isFixed) { FrameReg = RegInfo->getFrameRegister(MF); Offset = FPOffset; @@ -936,18 +980,19 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, }); if (Regs.size() > 1 || StrOpc== 0) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) - .addReg(ARM::SP).setMIFlags(MIFlags)); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) + .addReg(ARM::SP) + .setMIFlags(MIFlags) + .add(predOps(ARMCC::AL)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); } else if (Regs.size() == 1) { - MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), - ARM::SP) - .addReg(Regs[0].first, getKillRegState(Regs[0].second)) - .addReg(ARM::SP).setMIFlags(MIFlags) - .addImm(-4); - AddDefaultPred(MIB); + BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP) + .addReg(Regs[0].first, getKillRegState(Regs[0].second)) + .addReg(ARM::SP) + .setMIFlags(MIFlags) + .addImm(-4) + .add(predOps(ARMCC::AL)); } Regs.clear(); @@ -1027,9 +1072,9 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, }); if (Regs.size() > 1 || LdrOpc == 0) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) - .addReg(ARM::SP)); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); if (DeleteRet && MI != MBB.end()) { @@ -1053,7 +1098,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); } else MIB.addImm(4); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); } Regs.clear(); @@ -1114,9 +1159,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // sub r4, sp, #numregs * 8 // The immediate is <= 64, so it doesn't need any special encoding. unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::SP) - .addImm(8 * NumAlignedDPRCS2Regs))); + BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addReg(ARM::SP) + .addImm(8 * NumAlignedDPRCS2Regs) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment(); // We must set parameter MustBeSingleInstruction to true, since @@ -1132,10 +1179,10 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // Leave r4 live, it is used below. Opc = isThumb ? ARM::tMOVr : ARM::MOVr; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) - .addReg(ARM::R4); - MIB = AddDefaultPred(MIB); + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)); if (!isThumb) - AddDefaultCC(MIB); + MIB.add(condCodeOp()); // Now spill NumAlignedDPRCS2Regs registers starting from d8. // r4 holds the stack slot address. @@ -1147,11 +1194,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); MBB.addLiveIn(SupReg); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), - ARM::R4) - .addReg(ARM::R4, RegState::Kill).addImm(16) - .addReg(NextReg) - .addReg(SupReg, RegState::ImplicitKill)); + BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4) + .addReg(ARM::R4, RegState::Kill) + .addImm(16) + .addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1165,9 +1213,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); MBB.addLiveIn(SupReg); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) - .addReg(ARM::R4).addImm(16).addReg(NextReg) - .addReg(SupReg, RegState::ImplicitKill)); + BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) + .addReg(ARM::R4) + .addImm(16) + .addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1177,8 +1228,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass); MBB.addLiveIn(SupReg); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) - .addReg(ARM::R4).addImm(16).addReg(SupReg)); + BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) + .addReg(ARM::R4) + .addImm(16) + .addReg(SupReg) + .add(predOps(ARMCC::AL)); NextReg += 2; NumAlignedDPRCS2Regs -= 2; } @@ -1187,9 +1241,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs) { MBB.addLiveIn(NextReg); // vstr.64 uses addrmode5 which has an offset scale of 4. - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) - .addReg(NextReg) - .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); + BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) + .addReg(NextReg) + .addReg(ARM::R4) + .addImm((NextReg - R4BaseReg) * 2) + .add(predOps(ARMCC::AL)); } // The last spill instruction inserted should kill the scratch register r4. @@ -1254,8 +1310,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addFrameIndex(D8SpillFI).addImm(0))); + BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addFrameIndex(D8SpillFI) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); // Now restore NumAlignedDPRCS2Regs registers starting from d8. unsigned NextReg = ARM::D8; @@ -1264,10 +1323,12 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs >= 6) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) - .addReg(ARM::R4, RegState::Define) - .addReg(ARM::R4, RegState::Kill).addImm(16) - .addReg(SupReg, RegState::ImplicitDefine)); + BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) + .addReg(ARM::R4, RegState::Define) + .addReg(ARM::R4, RegState::Kill) + .addImm(16) + .addReg(SupReg, RegState::ImplicitDefine) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1280,9 +1341,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs >= 4) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) - .addReg(ARM::R4).addImm(16) - .addReg(SupReg, RegState::ImplicitDefine)); + BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) + .addReg(ARM::R4) + .addImm(16) + .addReg(SupReg, RegState::ImplicitDefine) + .add(predOps(ARMCC::AL)); NextReg += 4; NumAlignedDPRCS2Regs -= 4; } @@ -1291,16 +1354,20 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, if (NumAlignedDPRCS2Regs >= 2) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass); - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) - .addReg(ARM::R4).addImm(16)); + BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) + .addReg(ARM::R4) + .addImm(16) + .add(predOps(ARMCC::AL)); NextReg += 2; NumAlignedDPRCS2Regs -= 2; } // Finally, use a vanilla vldr.64 for the remaining odd register. if (NumAlignedDPRCS2Regs) - AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) - .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); + BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) + .addReg(ARM::R4) + .addImm(2 * (NextReg - R4BaseReg)) + .add(predOps(ARMCC::AL)); // Last store kills r4. std::prev(MI)->addRegisterKilled(ARM::R4, TRI); @@ -1633,13 +1700,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // worth the effort and added fragility? unsigned EstimatedStackSize = MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills); - if (hasFP(MF)) { + bool HasFP = hasFP(MF); + if (HasFP) { if (AFI->hasStackFrame()) EstimatedStackSize += 4; } else { // If FP is not used, SP will be used to access arguments, so count the // size of arguments into the estimation. - EstimatedStackSize += MF.getInfo<ARMFunctionInfo>()->getArgumentStackSize(); + EstimatedStackSize += AFI->getArgumentStackSize(); } EstimatedStackSize += 16; // For possible paddings. @@ -1650,7 +1718,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); - if (hasFP(MF)) { + if (HasFP) { SavedRegs.set(FramePtr); // If the frame pointer is required by the ABI, also spill LR so that we // emit a complete frame record. @@ -1658,11 +1726,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(ARM::LR); LRSpilled = true; NumGPRSpills++; - auto LRPos = find(UnspilledCS1GPRs, ARM::LR); + auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR); if (LRPos != UnspilledCS1GPRs.end()) UnspilledCS1GPRs.erase(LRPos); } - auto FPPos = find(UnspilledCS1GPRs, FramePtr); + auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr); if (FPPos != UnspilledCS1GPRs.end()) UnspilledCS1GPRs.erase(FPPos); NumGPRSpills++; @@ -1721,7 +1789,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } // r7 can be used if it is not being used as the frame pointer. - if (!hasFP(MF)) { + if (!HasFP) { if (SavedRegs.test(ARM::R7)) { --RegDeficit; DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = " @@ -1773,7 +1841,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, NumGPRSpills++; CS1Spilled = true; ExtraCSSpill = true; - UnspilledCS1GPRs.erase(find(UnspilledCS1GPRs, Reg)); + UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg)); if (Reg == ARM::LR) LRSpilled = true; } @@ -1786,7 +1854,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(ARM::LR); NumGPRSpills++; SmallVectorImpl<unsigned>::iterator LRPos; - LRPos = find(UnspilledCS1GPRs, (unsigned)ARM::LR); + LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR); if (LRPos != UnspilledCS1GPRs.end()) UnspilledCS1GPRs.erase(LRPos); @@ -2081,12 +2149,17 @@ void ARMFrameLowering::adjustForSegmentedStacks( // SR1: Scratch Register #1 // push {SR0, SR1} if (Thumb) { - AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))) - .addReg(ScratchReg0).addReg(ScratchReg1); + BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } else { - AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) - .addReg(ARM::SP, RegState::Define).addReg(ARM::SP)) - .addReg(ScratchReg0).addReg(ScratchReg1); + BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } // Emit the relevant DWARF information about the change in stack pointer as @@ -2106,21 +2179,29 @@ void ARMFrameLowering::adjustForSegmentedStacks( // mov SR1, sp if (Thumb) { - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) - .addReg(ARM::SP)); + BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); } else if (CompareStackPointer) { - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) - .addReg(ARM::SP)).addReg(0); + BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } // sub SR1, sp, #StackSize if (!CompareStackPointer && Thumb) { - AddDefaultPred( - AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)) - .addReg(ScratchReg1).addImm(AlignedStackSize)); + BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1) + .add(condCodeOp()) + .addReg(ScratchReg1) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); } else if (!CompareStackPointer) { - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) - .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0); + BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) + .addReg(ARM::SP) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } if (Thumb && ST->isThumb1Only()) { @@ -2131,21 +2212,25 @@ void ARMFrameLowering::adjustForSegmentedStacks( unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4); // ldr SR0, [pc, offset(STACK_LIMIT)] - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) - .addConstantPoolIndex(CPI)); + BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) + .addConstantPoolIndex(CPI) + .add(predOps(ARMCC::AL)); // ldr SR0, [SR0] - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) - .addReg(ScratchReg0).addImm(0)); + BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) + .addReg(ScratchReg0) + .addImm(0) + .add(predOps(ARMCC::AL)); } else { // Get TLS base address from the coprocessor // mrc p15, #0, SR0, c13, c0, #3 - AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) - .addImm(15) - .addImm(0) - .addImm(13) - .addImm(0) - .addImm(3)); + BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) + .addImm(15) + .addImm(0) + .addImm(13) + .addImm(0) + .addImm(3) + .add(predOps(ARMCC::AL)); // Use the last tls slot on android and a private field of the TCP on linux. assert(ST->isTargetAndroid() || ST->isTargetLinux()); @@ -2153,16 +2238,19 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Get the stack limit from the right offset // ldr SR0, [sr0, #4 * TlsOffset] - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) - .addReg(ScratchReg0).addImm(4 * TlsOffset)); + BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) + .addReg(ScratchReg0) + .addImm(4 * TlsOffset) + .add(predOps(ARMCC::AL)); } // Compare stack limit with stack size requested. // cmp SR0, SR1 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr; - AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode)) - .addReg(ScratchReg0) - .addReg(ScratchReg1)); + BuildMI(GetMBB, DL, TII.get(Opcode)) + .addReg(ScratchReg0) + .addReg(ScratchReg1) + .add(predOps(ARMCC::AL)); // This jump is taken if StackLimit < SP - stack required. Opcode = Thumb ? ARM::tBcc : ARM::Bcc; @@ -2178,32 +2266,40 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Pass first argument for the __morestack by Scratch Register #0. // The amount size of stack required if (Thumb) { - AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), - ScratchReg0)).addImm(AlignedStackSize)); + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0) + .add(condCodeOp()) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) - .addImm(AlignedStackSize)).addReg(0); + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } // Pass second argument for the __morestack by Scratch Register #1. // The amount size of stack consumed to save function arguments. if (Thumb) { - AddDefaultPred( - AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))); + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1) + .add(condCodeOp()) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))) - .addReg(0); + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); } // push {lr} - Save return address of this function. if (Thumb) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))) + BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) .addReg(ARM::LR); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) + BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) .addReg(ARM::LR); } @@ -2220,7 +2316,8 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Call __morestack(). if (Thumb) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL))) + BuildMI(AllocMBB, DL, TII.get(ARM::tBL)) + .add(predOps(ARMCC::AL)) .addExternalSymbol("__morestack"); } else { BuildMI(AllocMBB, DL, TII.get(ARM::BL)) @@ -2230,22 +2327,26 @@ void ARMFrameLowering::adjustForSegmentedStacks( // pop {lr} - Restore return address of this original function. if (Thumb) { if (ST->isThumb1Only()) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) - .addReg(ScratchReg0); - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) - .addReg(ScratchReg0)); + BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0); + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) + .addReg(ScratchReg0) + .add(predOps(ARMCC::AL)); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) - .addReg(ARM::LR, RegState::Define) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP) - .addImm(4)); + BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .addImm(4) + .add(predOps(ARMCC::AL)); } } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) - .addReg(ARM::LR); + BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ARM::LR); } // Restore SR0 and SR1 in case of __morestack() was called. @@ -2253,15 +2354,17 @@ void ARMFrameLowering::adjustForSegmentedStacks( // scratch registers from here. // pop {SR0, SR1} if (Thumb) { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } else { - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } // Update the CFA offset now that we've popped @@ -2271,20 +2374,22 @@ void ARMFrameLowering::adjustForSegmentedStacks( // bx lr - Return from this function. Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET; - AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode))); + BuildMI(AllocMBB, DL, TII.get(Opcode)).add(predOps(ARMCC::AL)); // Restore SR0 and SR1 in case of __morestack() was not called. // pop {SR0, SR1} if (Thumb) { - AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } else { - AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); } // Update the CFA offset now that we've popped diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index c3e9591d5c70..b07b4e1f5cfb 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -244,11 +244,8 @@ private: bool tryInlineAsm(SDNode *N); - void SelectConcatVector(SDNode *N); void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); - bool trySMLAWSMULW(SDNode *N); - void SelectCMP_SWAP(SDNode *N); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for @@ -2559,141 +2556,6 @@ bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ return false; } -static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1, - bool Accumulate) { - // For SM*WB, we need to some form of sext. - // For SM*WT, we need to search for (sra X, 16) - // Src1 then gets set to X. - if ((SignExt.getOpcode() == ISD::SIGN_EXTEND || - SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG || - SignExt.getOpcode() == ISD::AssertSext) && - SignExt.getValueType() == MVT::i32) { - - *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; - Src1 = SignExt.getOperand(0); - return true; - } - - if (SignExt.getOpcode() != ISD::SRA) - return false; - - ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1)); - if (!SRASrc1 || SRASrc1->getZExtValue() != 16) - return false; - - SDValue Op0 = SignExt.getOperand(0); - - // The sign extend operand for SM*WB could be generated by a shl and ashr. - if (Op0.getOpcode() == ISD::SHL) { - SDValue SHL = Op0; - ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1)); - if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16) - return false; - - *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB; - Src1 = Op0.getOperand(0); - return true; - } - *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT; - Src1 = SignExt.getOperand(0); - return true; -} - -static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0, - SDValue &Src1, bool Accumulate) { - // First we look for: - // (add (or (srl ?, 16), (shl ?, 16))) - if (OR.getOpcode() != ISD::OR) - return false; - - SDValue SRL = OR.getOperand(0); - SDValue SHL = OR.getOperand(1); - - if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) { - SRL = OR.getOperand(1); - SHL = OR.getOperand(0); - if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) - return false; - } - - ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1)); - ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1)); - if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 || - SHLSrc1->getZExtValue() != 16) - return false; - - // The first operands to the shifts need to be the two results from the - // same smul_lohi node. - if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) || - SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI) - return false; - - SDNode *SMULLOHI = SRL.getOperand(0).getNode(); - if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) || - SHL.getOperand(0) != SDValue(SMULLOHI, 1)) - return false; - - // Now we have: - // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16))) - // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments. - // For SMLAWB the 16-bit value will signed extended somehow. - // For SMLAWT only the SRA is required. - - // Check both sides of SMUL_LOHI - if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) { - Src0 = SMULLOHI->getOperand(1); - } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1, - Accumulate)) { - Src0 = SMULLOHI->getOperand(0); - } else { - return false; - } - return true; -} - -bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) { - if (!Subtarget->hasV6Ops() || - (Subtarget->isThumb() && !Subtarget->hasThumb2())) - return false; - - SDLoc dl(N); - SDValue Src0 = N->getOperand(0); - SDValue Src1 = N->getOperand(1); - SDValue A, B; - unsigned Opc = 0; - - if (N->getOpcode() == ISD::ADD) { - if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR) - return false; - - SDValue Acc; - if (SearchSignedMulLong(Src0, &Opc, A, B, true)) { - Acc = Src1; - } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) { - Acc = Src0; - } else { - return false; - } - if (Opc == 0) - return false; - - SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32) }; - CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops); - return true; - } else if (N->getOpcode() == ISD::OR && - SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) { - if (Opc == 0) - return false; - - SDValue Ops[] = { A, B, getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32)}; - CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); - return true; - } - return false; -} - /// We've got special pseudo-instructions for these void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { unsigned Opcode; @@ -2722,15 +2584,6 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { CurDAG->RemoveDeadNode(N); } -void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { - // The only time a CONCAT_VECTORS operation can have legal types is when - // two 64-bit vectors are concatenated to a 128-bit vector. - EVT VT = N->getValueType(0); - if (!VT.is128BitVector() || N->getNumOperands() != 2) - llvm_unreachable("unexpected CONCAT_VECTORS"); - ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1))); -} - static Optional<std::pair<unsigned, unsigned>> getContiguousRangeOfSetBits(const APInt &A) { unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; @@ -2822,11 +2675,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; - case ISD::ADD: - case ISD::OR: - if (trySMLAWSMULW(N)) - return; - break; case ISD::WRITE_REGISTER: if (tryWriteRegister(N)) return; @@ -3042,49 +2890,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { break; } - case ARMISD::VMOVRRD: - ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, - N->getOperand(0), getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32))); - return; - case ISD::UMUL_LOHI: { - if (Subtarget->isThumb1Only()) - break; - if (Subtarget->isThumb()) { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; - ReplaceNode( - N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops)); - return; - } else { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), - CurDAG->getRegister(0, MVT::i32) }; - ReplaceNode(N, CurDAG->getMachineNode( - Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl, - MVT::i32, MVT::i32, Ops)); - return; - } - } - case ISD::SMUL_LOHI: { - if (Subtarget->isThumb1Only()) - break; - if (Subtarget->isThumb()) { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; - ReplaceNode( - N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops)); - return; - } else { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), - CurDAG->getRegister(0, MVT::i32) }; - ReplaceNode(N, CurDAG->getMachineNode( - Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl, - MVT::i32, MVT::i32, Ops)); - return; - } - } case ARMISD::UMAAL: { unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; SDValue Ops[] = { N->getOperand(0), N->getOperand(1), @@ -3095,38 +2900,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } case ARMISD::UMLAL:{ - // UMAAL is similar to UMLAL but it adds two 32-bit values to the - // 64-bit multiplication result. - if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && - N->getOperand(2).getOpcode() == ARMISD::ADDC && - N->getOperand(3).getOpcode() == ARMISD::ADDE) { - - SDValue Addc = N->getOperand(2); - SDValue Adde = N->getOperand(3); - - if (Adde.getOperand(2).getNode() == Addc.getNode()) { - - ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0)); - ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1)); - - if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0) - { - // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm - // RdLo = one operand to be added, lower 32-bits of res - // RdHi = other operand to be added, upper 32-bits of res - // Rn = first multiply operand - // Rm = second multiply operand - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - Addc.getOperand(0), Addc.getOperand(1), - getAL(CurDAG, dl), - CurDAG->getRegister(0, MVT::i32) }; - unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; - CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops); - return; - } - } - } - if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG, dl), @@ -3277,26 +3050,23 @@ void ARMDAGToDAGISel::Select(SDNode *N) { int64_t Addend = -C->getSExtValue(); SDNode *Add = nullptr; - // In T2 mode, ADDS can be better than CMN if the immediate fits in a + // ADDS can be better than CMN if the immediate fits in a // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. // Outside that range we can just use a CMN which is 32-bit but has a // 12-bit immediate range. - if (Subtarget->isThumb2() && Addend < 1<<8) { - SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), - CurDAG->getRegister(0, MVT::i32) }; - Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); - } else if (!Subtarget->isThumb2() && Addend < 1<<8) { - // FIXME: Add T1 tADDi8 code. - SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, - CurDAG->getTargetConstant(Addend, dl, MVT::i32), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; - Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops); - } else if (!Subtarget->isThumb2() && Addend < 1<<3) { - SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, - CurDAG->getTargetConstant(Addend, dl, MVT::i32), - getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; - Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops); + if (Addend < 1<<8) { + if (Subtarget->isThumb2()) { + SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); + } else { + unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; + SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, + CurDAG->getTargetConstant(Addend, dl, MVT::i32), + getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; + Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); + } } if (Add) { SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; @@ -4013,10 +3783,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } - case ISD::CONCAT_VECTORS: - SelectConcatVector(N); - return; - case ISD::ATOMIC_CMP_SWAP: SelectCMP_SWAP(N); return; @@ -4123,11 +3889,10 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) { // The flags here are common to those allowed for apsr in the A class cores and // those allowed for the special registers in the M class cores. Returns a // value representing which flags were present, -1 if invalid. -static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) { - if (Flags.empty()) - return 0x2 | (int)hasDSP; - +static inline int getMClassFlagsMask(StringRef Flags) { return StringSwitch<int>(Flags) + .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is + // correct when flags are not permitted .Case("g", 0x1) .Case("nzcvq", 0x2) .Case("nzcvqg", 0x3) @@ -4170,7 +3935,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, } // We know we are now handling a write so need to get the mask for the flags. - int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP()); + int Mask = getMClassFlagsMask(Flags); // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values // shouldn't have flags present. @@ -4185,10 +3950,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, // The register was valid so need to put the mask in the correct place // (the flags need to be in bits 11-10) and combine with the SYSmvalue to // construct the operand for the instruction node. - if (SYSmvalue < 0x4) - return SYSmvalue | Mask << 10; - - return SYSmvalue; + return SYSmvalue | Mask << 10; } static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { @@ -4201,7 +3963,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { // The flags permitted for apsr are the same flags that are allowed in // M class registers. We get the flag value and then shift the flags into // the correct place to combine with the mask. - Mask = getMClassFlagsMask(Flags, true); + Mask = getMClassFlagsMask(Flags); if (Mask == -1) return -1; return Mask << 2; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 0f84a2359160..e697c8ca5339 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -12,47 +12,101 @@ // //===----------------------------------------------------------------------===// -#include "ARMISelLowering.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" +#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" +#include "ARMRegisterInfo.h" +#include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" -#include "ARMTargetMachine.h" -#include "ARMTargetObjectFile.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" -#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/MC/MCSectionMachO.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <iterator> +#include <limits> +#include <tuple> +#include <string> #include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "arm-isel" @@ -82,21 +136,6 @@ static cl::opt<unsigned> ConstpoolPromotionMaxTotal( cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128)); -namespace { - class ARMCCState : public CCState { - public: - ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, - SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, - ParmContext PC) - : CCState(CC, isVarArg, MF, locs, C) { - assert(((PC == Call) || (PC == Prologue)) && - "ARMCCState users must specify whether their context is call" - "or prologue generation."); - CallOrPrologue = PC; - } - }; -} - // The APCS parameter registers. static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 @@ -685,10 +724,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } } - // ARM and Thumb2 support UMLAL/SMLAL. - if (!Subtarget->isThumb1Only()) - setTargetDAGCombine(ISD::ADDC); - if (Subtarget->isFPOnlySP()) { // When targeting a floating-point unit with only single-precision // operations, f64 is legal for the few double-precision instructions which @@ -787,13 +822,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); - if (!Subtarget->isThumb1Only()) { - // FIXME: We should do this for Thumb1 as well. - setOperationAction(ISD::ADDC, MVT::i32, Custom); - setOperationAction(ISD::ADDE, MVT::i32, Custom); - setOperationAction(ISD::SUBC, MVT::i32, Custom); - setOperationAction(ISD::SUBE, MVT::i32, Custom); - } + setOperationAction(ISD::ADDC, MVT::i32, Custom); + setOperationAction(ISD::ADDE, MVT::i32, Custom); + setOperationAction(ISD::SUBC, MVT::i32, Custom); + setOperationAction(ISD::SUBE, MVT::i32, Custom); if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); @@ -1305,6 +1337,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; + case ARMISD::SMLALBB: return "ARMISD::SMLALBB"; + case ARMISD::SMLALBT: return "ARMISD::SMLALBT"; + case ARMISD::SMLALTB: return "ARMISD::SMLALTB"; + case ARMISD::SMLALTT: return "ARMISD::SMLALTT"; + case ARMISD::SMULWB: return "ARMISD::SMULWB"; + case ARMISD::SMULWT: return "ARMISD::SMULWT"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; @@ -1414,6 +1452,40 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // Lowering Code //===----------------------------------------------------------------------===// +static bool isSRL16(const SDValue &Op) { + if (Op.getOpcode() != ISD::SRL) + return false; + if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return Const->getZExtValue() == 16; + return false; +} + +static bool isSRA16(const SDValue &Op) { + if (Op.getOpcode() != ISD::SRA) + return false; + if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return Const->getZExtValue() == 16; + return false; +} + +static bool isSHL16(const SDValue &Op) { + if (Op.getOpcode() != ISD::SHL) + return false; + if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return Const->getZExtValue() == 16; + return false; +} + +// Check for a signed 16-bit value. We special case SRA because it makes it +// more simple when also looking for SRAs that aren't sign extending a +// smaller value. Without the check, we'd need to take extra care with +// checking order for some operations. +static bool isS16(const SDValue &Op, SelectionDAG &DAG) { + if (isSRA16(Op)) + return isSHL16(Op.getOperand(0)); + return DAG.ComputeNumSignBits(Op) == 17; +} + /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { switch (CC) { @@ -1433,22 +1505,34 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, - ARMCC::CondCodes &CondCode2) { + ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) { CondCode2 = ARMCC::AL; + InvalidOnQNaN = true; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); case ISD::SETEQ: - case ISD::SETOEQ: CondCode = ARMCC::EQ; break; + case ISD::SETOEQ: + CondCode = ARMCC::EQ; + InvalidOnQNaN = false; + break; case ISD::SETGT: case ISD::SETOGT: CondCode = ARMCC::GT; break; case ISD::SETGE: case ISD::SETOGE: CondCode = ARMCC::GE; break; case ISD::SETOLT: CondCode = ARMCC::MI; break; case ISD::SETOLE: CondCode = ARMCC::LS; break; - case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; + case ISD::SETONE: + CondCode = ARMCC::MI; + CondCode2 = ARMCC::GT; + InvalidOnQNaN = false; + break; case ISD::SETO: CondCode = ARMCC::VC; break; case ISD::SETUO: CondCode = ARMCC::VS; break; - case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; + case ISD::SETUEQ: + CondCode = ARMCC::EQ; + CondCode2 = ARMCC::VS; + InvalidOnQNaN = false; + break; case ISD::SETUGT: CondCode = ARMCC::HI; break; case ISD::SETUGE: CondCode = ARMCC::PL; break; case ISD::SETLT: @@ -1456,7 +1540,10 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, case ISD::SETLE: case ISD::SETULE: CondCode = ARMCC::LE; break; case ISD::SETNE: - case ISD::SETUNE: CondCode = ARMCC::NE; break; + case ISD::SETUNE: + CondCode = ARMCC::NE; + InvalidOnQNaN = false; + break; } } @@ -1549,8 +1636,8 @@ SDValue ARMTargetLowering::LowerCallResult( // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, - *DAG.getContext(), Call); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); // Copy all of the result registers out of their specified physreg. @@ -1710,8 +1797,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, - *DAG.getContext(), Call); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); // Get a count of how many bytes are to be pushed on the stack. @@ -2088,10 +2175,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, /// this. void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, unsigned Align) const { - assert((State->getCallOrPrologue() == Prologue || - State->getCallOrPrologue() == Call) && - "unhandled ParmContext"); - // Byval (as with any stack) slots are always at least 4 byte aligned. Align = std::max(Align, 4U); @@ -2148,7 +2231,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { unsigned Bytes = Arg.getValueSizeInBits() / 8; - int FI = INT_MAX; + int FI = std::numeric_limits<int>::max(); if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); if (!TargetRegisterInfo::isVirtualRegister(VR)) @@ -2178,7 +2261,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, } else return false; - assert(FI != INT_MAX); + assert(FI != std::numeric_limits<int>::max()); if (!MFI.isFixedObjectIndex(FI)) return false; return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); @@ -2260,7 +2343,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // Check if stack adjustment is needed. For now, do not do this if any // argument is passed on the stack. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call); + CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); if (CCInfo.getNextStackOffset()) { // Check if the arguments are already laid out in the right way as @@ -2362,8 +2445,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector<CCValAssign, 16> RVLocs; // CCState - Info about the registers and stack slots. - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, - *DAG.getContext(), Call); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); // Analyze outgoing return values. CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); @@ -2790,9 +2873,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, // FIXME: is there useful debug info available here? TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), - DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); + CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( + CallingConv::C, Type::getInt32Ty(*DAG.getContext()), + DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.first; @@ -2935,7 +3018,7 @@ static bool isSimpleType(Type *T) { } static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, - EVT PtrVT, SDLoc dl) { + EVT PtrVT, const SDLoc &dl) { // If we're creating a pool entry for a constant global with unnamed address, // and the global is small enough, we can emit it inline into the constant pool // to save ourselves an indirection. @@ -2980,7 +3063,8 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, unsigned RequiredPadding = 4 - (Size % 4); bool PaddingPossible = RequiredPadding == 4 || (CDAInit && CDAInit->isString()); - if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize) + if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize || + Size == 0) return SDValue(); unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding); @@ -3080,15 +3164,22 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, return Result; } else if (Subtarget->isRWPI() && !IsRO) { // SB-relative. - ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue G = DAG.getLoad( - PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + SDValue RelAddr; + if (Subtarget->useMovt(DAG.getMachineFunction())) { + ++NumMovwMovt; + SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL); + RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G); + } else { // use literal pool for address constant + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + RelAddr = DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + } SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT); - SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, G); + SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr); return Result; } @@ -3462,8 +3553,8 @@ SDValue ARMTargetLowering::LowerFormalArguments( // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, - *DAG.getContext(), Prologue); + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); SmallVector<SDValue, 16> ArgValues; @@ -3595,7 +3686,6 @@ SDValue ARMTargetLowering::LowerFormalArguments( InVals.push_back(ArgValue); } else { // VA.isRegLoc() - // sanity check assert(VA.isMemLoc()); assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); @@ -3734,13 +3824,15 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const SDLoc &dl) const { + SelectionDAG &DAG, const SDLoc &dl, + bool InvalidOnQNaN) const { assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); SDValue Cmp; + SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32); if (!isFloatingPointZero(RHS)) - Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); + Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C); else - Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); + Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C); return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } @@ -3757,10 +3849,12 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { Cmp = Cmp.getOperand(0); Opc = Cmp.getOpcode(); if (Opc == ARMISD::CMPFP) - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), + Cmp.getOperand(1), Cmp.getOperand(2)); else { assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), + Cmp.getOperand(1)); } return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } @@ -3808,7 +3902,6 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, return std::make_pair(Value, OverflowCmp); } - SDValue ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { // Let legalize expand this if it isn't a legal type yet. @@ -3832,7 +3925,6 @@ ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } - SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -4025,7 +4117,6 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, // Additionally, the variable is returned in parameter V and the constant in K. static bool isSaturatingConditional(const SDValue &Op, SDValue &V, uint64_t &K) { - SDValue LHS1 = Op.getOperand(0); SDValue RHS1 = Op.getOperand(1); SDValue TrueVal1 = Op.getOperand(2); @@ -4046,10 +4137,10 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V, // in each conditional SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1) ? &RHS1 - : NULL; + : nullptr; SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2) ? &RHS2 - : NULL; + : nullptr; SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2; SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1; SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2; @@ -4073,13 +4164,15 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V, const SDValue *LowerCheckOp = isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) ? &Op - : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 - : NULL; + : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) + ? &Op2 + : nullptr; const SDValue *UpperCheckOp = isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) ? &Op - : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 - : NULL; + : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) + ? &Op2 + : nullptr; if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp) return false; @@ -4104,7 +4197,6 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V, } SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); SDLoc dl(Op); @@ -4162,7 +4254,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } ARMCC::CondCodes CondCode, CondCode2; - FPCCToARMCC(CC, CondCode, CondCode2); + bool InvalidOnQNaN; + FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); // Try to generate VMAXNM/VMINNM on ARMv8. if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || @@ -4181,13 +4274,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; @@ -4348,10 +4441,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { } ARMCC::CondCodes CondCode, CondCode2; - FPCCToARMCC(CC, CondCode, CondCode2); + bool InvalidOnQNaN; + FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; @@ -4853,9 +4947,10 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) // so that the shift + and get folded into a bitfield extract. SDLoc dl(Op); - SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, - DAG.getConstant(Intrinsic::arm_get_fpscr, dl, - MVT::i32)); + SDValue Ops[] = { DAG.getEntryNode(), + DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) }; + + SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops); SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, DAG.getConstant(1U << 22, dl, MVT::i32)); SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, @@ -5584,7 +5679,6 @@ static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { return true; } - static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); @@ -6027,10 +6121,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } if (ValueCounts.size() != 1) usesOnlyOneValue = false; - if (!Value.getNode() && ValueCounts.size() > 0) + if (!Value.getNode() && !ValueCounts.empty()) Value = ValueCounts.begin()->first; - if (ValueCounts.size() == 0) + if (ValueCounts.empty()) return DAG.getUNDEF(VT); // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. @@ -6182,8 +6276,8 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, struct ShuffleSourceInfo { SDValue Vec; - unsigned MinElt; - unsigned MaxElt; + unsigned MinElt = std::numeric_limits<unsigned>::max(); + unsigned MaxElt = 0; // We may insert some combination of BITCASTs and VEXT nodes to force Vec to // be compatible with the shuffle we intend to construct. As a result @@ -6192,13 +6286,12 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // Code should guarantee that element i in Vec starts at element "WindowBase // + i * WindowScale in ShuffleVec". - int WindowBase; - int WindowScale; + int WindowBase = 0; + int WindowScale = 1; + + ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {} bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } - ShuffleSourceInfo(SDValue Vec) - : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0), - WindowScale(1) {} }; // First gather all vectors used as an immediate source for this BUILD_VECTOR @@ -6220,7 +6313,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // Add this element source to the list if it's not already there. SDValue SourceVec = V.getOperand(0); - auto Source = find(Sources, SourceVec); + auto Source = llvm::find(Sources, SourceVec); if (Source == Sources.end()) Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); @@ -6336,7 +6429,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, if (Entry.isUndef()) continue; - auto Src = find(Sources, Entry.getOperand(0)); + auto Src = llvm::find(Sources, Entry.getOperand(0)); int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue(); // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit @@ -6633,7 +6726,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { EVT SubVT = SubV1.getValueType(); // We expect these to have been canonicalized to -1. - assert(all_of(ShuffleMask, [&](int i) { + assert(llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"); @@ -6896,8 +6989,19 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { N->getValueType(0), N->getOpcode()); - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) - return SkipLoadExtensionForVMULL(LD, DAG); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && + "Expected extending load"); + + SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG); + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1)); + unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue extLoad = + DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad); + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad); + + return newLoad; + } // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will // have been legalized as a BITCAST from v4i32. @@ -7258,9 +7362,9 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { ArgListEntry Entry; Entry.Node = SRet; Entry.Ty = RetTy->getPointerTo(); - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isSRet = true; + Entry.IsSExt = false; + Entry.IsZExt = false; + Entry.IsSRet = true; Args.push_back(Entry); RetTy = Type::getVoidTy(*DAG.getContext()); } @@ -7268,8 +7372,8 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; - Entry.isSExt = false; - Entry.isZExt = false; + Entry.IsSExt = false; + Entry.IsZExt = false; Args.push_back(Entry); const char *LibcallName = @@ -7480,12 +7584,12 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, Entry.Node = Val; Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isZExt = true; + Entry.IsZExt = true; Args.push_back(Entry); Entry.Node = Exponent; Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isZExt = true; + Entry.IsZExt = true; Args.push_back(Entry); Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext()); @@ -7702,24 +7806,27 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, // add r5, pc // str r5, [$jbuf, #+4] ; &jbuf[1] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) - .addConstantPoolIndex(CPI) - .addMemOperand(CPMMO)); + BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) + .addConstantPoolIndex(CPI) + .addMemOperand(CPMMO) + .add(predOps(ARMCC::AL)); // Set the low bit because of thumb mode. unsigned NewVReg2 = MRI->createVirtualRegister(TRC); - AddDefaultCC( - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) - .addReg(NewVReg1, RegState::Kill) - .addImm(0x01))); + BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(0x01) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) .addReg(NewVReg2, RegState::Kill) .addImm(PCLabelId); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) - .addReg(NewVReg3, RegState::Kill) - .addFrameIndex(FI) - .addImm(36) // &jbuf[1] :: pc - .addMemOperand(FIMMOSt)); + BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) + .addReg(NewVReg3, RegState::Kill) + .addFrameIndex(FI) + .addImm(36) // &jbuf[1] :: pc + .addMemOperand(FIMMOSt) + .add(predOps(ARMCC::AL)); } else if (isThumb) { // Incoming value: jbuf // ldr.n r1, LCPI1_4 @@ -7729,51 +7836,58 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, // add r2, $jbuf, #+4 ; &jbuf[1] // str r1, [r2] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) - .addConstantPoolIndex(CPI) - .addMemOperand(CPMMO)); + BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) + .addConstantPoolIndex(CPI) + .addMemOperand(CPMMO) + .add(predOps(ARMCC::AL)); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId); // Set the low bit because of thumb mode. unsigned NewVReg3 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) - .addReg(ARM::CPSR, RegState::Define) - .addImm(1)); + BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) + .addReg(ARM::CPSR, RegState::Define) + .addImm(1) + .add(predOps(ARMCC::AL)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) - .addReg(ARM::CPSR, RegState::Define) - .addReg(NewVReg2, RegState::Kill) - .addReg(NewVReg3, RegState::Kill)); + BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3, RegState::Kill) + .add(predOps(ARMCC::AL)); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) .addFrameIndex(FI) .addImm(36); // &jbuf[1] :: pc - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) - .addReg(NewVReg4, RegState::Kill) - .addReg(NewVReg5, RegState::Kill) - .addImm(0) - .addMemOperand(FIMMOSt)); + BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) + .addReg(NewVReg4, RegState::Kill) + .addReg(NewVReg5, RegState::Kill) + .addImm(0) + .addMemOperand(FIMMOSt) + .add(predOps(ARMCC::AL)); } else { // Incoming value: jbuf // ldr r1, LCPI1_1 // add r1, pc, r1 // str r1, [$jbuf, #+4] ; &jbuf[1] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) - .addConstantPoolIndex(CPI) - .addImm(0) - .addMemOperand(CPMMO)); + BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) + .addConstantPoolIndex(CPI) + .addImm(0) + .addMemOperand(CPMMO) + .add(predOps(ARMCC::AL)); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) - .addReg(NewVReg1, RegState::Kill) - .addImm(PCLabelId)); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) - .addReg(NewVReg2, RegState::Kill) - .addFrameIndex(FI) - .addImm(36) // &jbuf[1] :: pc - .addMemOperand(FIMMOSt)); + BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) + .addReg(NewVReg1, RegState::Kill) + .addImm(PCLabelId) + .add(predOps(ARMCC::AL)); + BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) + .addReg(NewVReg2, RegState::Kill) + .addFrameIndex(FI) + .addImm(36) // &jbuf[1] :: pc + .addMemOperand(FIMMOSt) + .add(predOps(ARMCC::AL)); } } @@ -7791,7 +7905,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, // Get a mapping of the call site numbers to all of the landing pads they're // associated with. - DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; + DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad; unsigned MaxCSNum = 0; for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { @@ -7886,31 +8000,36 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) - .addFrameIndex(FI) - .addImm(4) - .addMemOperand(FIMMOLd)); + BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(FIMMOLd) + .add(predOps(ARMCC::AL)); if (NumLPads < 256) { - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) - .addReg(NewVReg1) - .addImm(LPadList.size())); + BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) + .addReg(NewVReg1) + .addImm(LPadList.size()) + .add(predOps(ARMCC::AL)); } else { unsigned VReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) - .addImm(NumLPads & 0xFFFF)); + BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) + .addImm(NumLPads & 0xFFFF) + .add(predOps(ARMCC::AL)); unsigned VReg2 = VReg1; if ((NumLPads & 0xFFFF0000) != 0) { VReg2 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) - .addReg(VReg1) - .addImm(NumLPads >> 16)); + BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) + .addReg(VReg1) + .addImm(NumLPads >> 16) + .add(predOps(ARMCC::AL)); } - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) - .addReg(NewVReg1) - .addReg(VReg2)); + BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) + .addReg(NewVReg1) + .addReg(VReg2) + .add(predOps(ARMCC::AL)); } BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) @@ -7919,16 +8038,17 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(ARM::CPSR); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) - .addJumpTableIndex(MJTI)); + BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3) + .addJumpTableIndex(MJTI) + .add(predOps(ARMCC::AL)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); - AddDefaultCC( - AddDefaultPred( - BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) + BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg1) - .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) .addReg(NewVReg4, RegState::Kill) @@ -7936,15 +8056,17 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addJumpTableIndex(MJTI); } else if (Subtarget->isThumb()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) - .addFrameIndex(FI) - .addImm(1) - .addMemOperand(FIMMOLd)); + BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) + .addFrameIndex(FI) + .addImm(1) + .addMemOperand(FIMMOLd) + .add(predOps(ARMCC::AL)); if (NumLPads < 256) { - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) - .addReg(NewVReg1) - .addImm(NumLPads)); + BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) + .addReg(NewVReg1) + .addImm(NumLPads) + .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); @@ -7957,12 +8079,14 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) - .addReg(VReg1, RegState::Define) - .addConstantPoolIndex(Idx)); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) - .addReg(NewVReg1) - .addReg(VReg1)); + BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) + .addReg(VReg1, RegState::Define) + .addConstantPoolIndex(Idx) + .add(predOps(ARMCC::AL)); + BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) + .addReg(NewVReg1) + .addReg(VReg1) + .add(predOps(ARMCC::AL)); } BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) @@ -7971,37 +8095,42 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(ARM::CPSR); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) - .addReg(ARM::CPSR, RegState::Define) - .addReg(NewVReg1) - .addImm(2)); + BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg1) + .addImm(2) + .add(predOps(ARMCC::AL)); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) - .addJumpTableIndex(MJTI)); + BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) + .addJumpTableIndex(MJTI) + .add(predOps(ARMCC::AL)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) - .addReg(ARM::CPSR, RegState::Define) - .addReg(NewVReg2, RegState::Kill) - .addReg(NewVReg3)); + BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg2, RegState::Kill) + .addReg(NewVReg3) + .add(predOps(ARMCC::AL)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) - .addReg(NewVReg4, RegState::Kill) - .addImm(0) - .addMemOperand(JTMMOLd)); + BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) + .addReg(NewVReg4, RegState::Kill) + .addImm(0) + .addMemOperand(JTMMOLd) + .add(predOps(ARMCC::AL)); unsigned NewVReg6 = NewVReg5; if (IsPositionIndependent) { NewVReg6 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) - .addReg(ARM::CPSR, RegState::Define) - .addReg(NewVReg5, RegState::Kill) - .addReg(NewVReg3)); + BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg5, RegState::Kill) + .addReg(NewVReg3) + .add(predOps(ARMCC::AL)); } BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) @@ -8009,31 +8138,36 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addJumpTableIndex(MJTI); } else { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) - .addFrameIndex(FI) - .addImm(4) - .addMemOperand(FIMMOLd)); + BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(FIMMOLd) + .add(predOps(ARMCC::AL)); if (NumLPads < 256) { - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) - .addReg(NewVReg1) - .addImm(NumLPads)); + BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) + .addReg(NewVReg1) + .addImm(NumLPads) + .add(predOps(ARMCC::AL)); } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { unsigned VReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) - .addImm(NumLPads & 0xFFFF)); + BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) + .addImm(NumLPads & 0xFFFF) + .add(predOps(ARMCC::AL)); unsigned VReg2 = VReg1; if ((NumLPads & 0xFFFF0000) != 0) { VReg2 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) - .addReg(VReg1) - .addImm(NumLPads >> 16)); + BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) + .addReg(VReg1) + .addImm(NumLPads >> 16) + .add(predOps(ARMCC::AL)); } - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) - .addReg(NewVReg1) - .addReg(VReg2)); + BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) + .addReg(NewVReg1) + .addReg(VReg2) + .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); @@ -8046,13 +8180,15 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) - .addReg(VReg1, RegState::Define) - .addConstantPoolIndex(Idx) - .addImm(0)); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) - .addReg(NewVReg1) - .addReg(VReg1, RegState::Kill)); + BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) + .addReg(VReg1, RegState::Define) + .addConstantPoolIndex(Idx) + .addImm(0) + .add(predOps(ARMCC::AL)); + BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) + .addReg(NewVReg1) + .addReg(VReg1, RegState::Kill) + .add(predOps(ARMCC::AL)); } BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) @@ -8061,23 +8197,25 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addReg(ARM::CPSR); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); - AddDefaultCC( - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) - .addReg(NewVReg1) - .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); + BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) + .addReg(NewVReg1) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) - .addJumpTableIndex(MJTI)); + BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) + .addJumpTableIndex(MJTI) + .add(predOps(ARMCC::AL)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); - AddDefaultPred( - BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) - .addReg(NewVReg3, RegState::Kill) - .addReg(NewVReg4) - .addImm(0) - .addMemOperand(JTMMOLd)); + BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) + .addReg(NewVReg3, RegState::Kill) + .addReg(NewVReg4) + .addImm(0) + .addMemOperand(JTMMOLd) + .add(predOps(ARMCC::AL)); if (IsPositionIndependent) { BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) @@ -8222,26 +8360,35 @@ static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); assert(LdOpc != 0 && "Should have a load opcode"); if (LdSize >= 8) { - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) - .addReg(AddrOut, RegState::Define).addReg(AddrIn) - .addImm(0)); + BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define) + .addReg(AddrIn) + .addImm(0) + .add(predOps(ARMCC::AL)); } else if (IsThumb1) { // load + update AddrIn - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) - .addReg(AddrIn).addImm(0)); - MachineInstrBuilder MIB = - BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); - MIB = AddDefaultT1CC(MIB); - MIB.addReg(AddrIn).addImm(LdSize); - AddDefaultPred(MIB); + BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrIn) + .addImm(0) + .add(predOps(ARMCC::AL)); + BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut) + .add(t1CondCodeOp()) + .addReg(AddrIn) + .addImm(LdSize) + .add(predOps(ARMCC::AL)); } else if (IsThumb2) { - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) - .addReg(AddrOut, RegState::Define).addReg(AddrIn) - .addImm(LdSize)); + BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define) + .addReg(AddrIn) + .addImm(LdSize) + .add(predOps(ARMCC::AL)); } else { // arm - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) - .addReg(AddrOut, RegState::Define).addReg(AddrIn) - .addReg(0).addImm(LdSize)); + BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define) + .addReg(AddrIn) + .addReg(0) + .addImm(LdSize) + .add(predOps(ARMCC::AL)); } } @@ -8254,24 +8401,36 @@ static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); assert(StOpc != 0 && "Should have a store opcode"); if (StSize >= 8) { - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) - .addReg(AddrIn).addImm(0).addReg(Data)); + BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(AddrIn) + .addImm(0) + .addReg(Data) + .add(predOps(ARMCC::AL)); } else if (IsThumb1) { // store + update AddrIn - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data) - .addReg(AddrIn).addImm(0)); - MachineInstrBuilder MIB = - BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); - MIB = AddDefaultT1CC(MIB); - MIB.addReg(AddrIn).addImm(StSize); - AddDefaultPred(MIB); + BuildMI(*BB, Pos, dl, TII->get(StOpc)) + .addReg(Data) + .addReg(AddrIn) + .addImm(0) + .add(predOps(ARMCC::AL)); + BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut) + .add(t1CondCodeOp()) + .addReg(AddrIn) + .addImm(StSize) + .add(predOps(ARMCC::AL)); } else if (IsThumb2) { - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) - .addReg(Data).addReg(AddrIn).addImm(StSize)); + BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(Data) + .addReg(AddrIn) + .addImm(StSize) + .add(predOps(ARMCC::AL)); } else { // arm - AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) - .addReg(Data).addReg(AddrIn).addReg(0) - .addImm(StSize)); + BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(Data) + .addReg(AddrIn) + .addReg(0) + .addImm(StSize) + .add(predOps(ARMCC::AL)); } } @@ -8402,16 +8561,15 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) Vtmp = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, - TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), - Vtmp).addImm(LoopSize & 0xFFFF)); + BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp) + .addImm(LoopSize & 0xFFFF) + .add(predOps(ARMCC::AL)); if ((LoopSize & 0xFFFF0000) != 0) - AddDefaultPred(BuildMI(BB, dl, - TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), - varEnd) - .addReg(Vtmp) - .addImm(LoopSize >> 16)); + BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd) + .addReg(Vtmp) + .addImm(LoopSize >> 16) + .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); @@ -8424,11 +8582,16 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); if (IsThumb) - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( - varEnd, RegState::Define).addConstantPoolIndex(Idx)); + BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)) + .addReg(varEnd, RegState::Define) + .addConstantPoolIndex(Idx) + .add(predOps(ARMCC::AL)); else - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg( - varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0)); + BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)) + .addReg(varEnd, RegState::Define) + .addConstantPoolIndex(Idx) + .addImm(0) + .add(predOps(ARMCC::AL)); } BB->addSuccessor(loopMBB); @@ -8465,16 +8628,19 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, // Decrement loop variable by UnitSize. if (IsThumb1) { - MachineInstrBuilder MIB = - BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop); - MIB = AddDefaultT1CC(MIB); - MIB.addReg(varPhi).addImm(UnitSize); - AddDefaultPred(MIB); + BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop) + .add(t1CondCodeOp()) + .addReg(varPhi) + .addImm(UnitSize) + .add(predOps(ARMCC::AL)); } else { MachineInstrBuilder MIB = BuildMI(*BB, BB->end(), dl, TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); - AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); + MIB.addReg(varPhi) + .addImm(UnitSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); MIB->getOperand(5).setReg(ARM::CPSR); MIB->getOperand(5).setIsDef(true); } @@ -8545,11 +8711,12 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, case CodeModel::Default: case CodeModel::Kernel: BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addExternalSymbol("__chkstk") - .addReg(ARM::R4, RegState::Implicit | RegState::Kill) - .addReg(ARM::R4, RegState::Implicit | RegState::Define) - .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + .add(predOps(ARMCC::AL)) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, + RegState::Implicit | RegState::Define | RegState::Dead); break; case CodeModel::Large: case CodeModel::JITDefault: { @@ -8559,20 +8726,22 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) .addExternalSymbol("__chkstk"); BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addReg(Reg, RegState::Kill) - .addReg(ARM::R4, RegState::Implicit | RegState::Kill) - .addReg(ARM::R4, RegState::Implicit | RegState::Define) - .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + .add(predOps(ARMCC::AL)) + .addReg(Reg, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, + RegState::Implicit | RegState::Define | RegState::Dead); break; } } - AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), - ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addReg(ARM::R4, RegState::Kill) - .setMIFlags(MachineInstr::FrameSetup))); + BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP) + .addReg(ARM::SP, RegState::Kill) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); MI.eraseFromParent(); return MBB; @@ -8597,9 +8766,10 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI, MF->push_back(TrapBB); MBB->addSuccessor(TrapBB); - AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8)) - .addReg(MI.getOperand(0).getReg()) - .addImm(0)); + BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8)) + .addReg(MI.getOperand(0).getReg()) + .addImm(0) + .add(predOps(ARMCC::AL)); BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc)) .addMBB(TrapBB) .addImm(ARMCC::EQ) @@ -8617,18 +8787,18 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, bool isThumb2 = Subtarget->isThumb2(); switch (MI.getOpcode()) { default: { - MI.dump(); + MI.print(errs()); llvm_unreachable("Unexpected instr type to insert"); } // Thumb1 post-indexed loads are really just single-register LDMs. case ARM::tLDR_postidx: { BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD)) - .addOperand(MI.getOperand(1)) // Rn_wb - .addOperand(MI.getOperand(2)) // Rn - .addOperand(MI.getOperand(3)) // PredImm - .addOperand(MI.getOperand(4)) // PredReg - .addOperand(MI.getOperand(0)); // Rt + .add(MI.getOperand(1)) // Rn_wb + .add(MI.getOperand(2)) // Rn + .add(MI.getOperand(3)) // PredImm + .add(MI.getOperand(4)) // PredReg + .add(MI.getOperand(0)); // Rt MI.eraseFromParent(); return BB; } @@ -8659,12 +8829,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineMemOperand *MMO = *MI.memoperands_begin(); BuildMI(*BB, MI, dl, TII->get(NewOpc)) - .addOperand(MI.getOperand(0)) // Rn_wb - .addOperand(MI.getOperand(1)) // Rt - .addOperand(MI.getOperand(2)) // Rn - .addImm(Offset) // offset (skip GPR==zero_reg) - .addOperand(MI.getOperand(5)) // pred - .addOperand(MI.getOperand(6)) + .add(MI.getOperand(0)) // Rn_wb + .add(MI.getOperand(1)) // Rt + .add(MI.getOperand(2)) // Rn + .addImm(Offset) // offset (skip GPR==zero_reg) + .add(MI.getOperand(5)) // pred + .add(MI.getOperand(6)) .addMemOperand(MMO); MI.eraseFromParent(); return BB; @@ -8681,7 +8851,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); for (unsigned i = 0; i < MI.getNumOperands(); ++i) - MIB.addOperand(MI.getOperand(i)); + MIB.add(MI.getOperand(i)); MI.eraseFromParent(); return BB; } @@ -8754,18 +8924,20 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, unsigned LHS1 = MI.getOperand(1).getReg(); unsigned LHS2 = MI.getOperand(2).getReg(); if (RHSisZero) { - AddDefaultPred(BuildMI(BB, dl, - TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(LHS1).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(LHS1) + .addImm(0) + .add(predOps(ARMCC::AL)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(LHS2).addImm(0) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } else { unsigned RHS1 = MI.getOperand(3).getReg(); unsigned RHS2 = MI.getOperand(4).getReg(); - AddDefaultPred(BuildMI(BB, dl, - TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) - .addReg(LHS1).addReg(RHS1)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(LHS1) + .addReg(RHS1) + .add(predOps(ARMCC::AL)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS2).addReg(RHS2) .addImm(ARMCC::EQ).addReg(ARM::CPSR); @@ -8779,7 +8951,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); if (isThumb2) - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); + BuildMI(BB, dl, TII->get(ARM::t2B)) + .addMBB(exitMBB) + .add(predOps(ARMCC::AL)); else BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); @@ -8842,9 +9016,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, RSBBB->addSuccessor(SinkBB); // insert a cmp at the end of BB - AddDefaultPred(BuildMI(BB, dl, - TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(ABSSrcReg).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(ABSSrcReg) + .addImm(0) + .add(predOps(ARMCC::AL)); // insert a bcc with opposite CC to ARMCC::MI at the end of BB BuildMI(BB, dl, @@ -8855,9 +9030,11 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // Note: BCC and rsbri will be converted into predicated rsbmi // by if-conversion pass BuildMI(*RSBBB, RSBBB->begin(), dl, - TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) - .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0) - .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) + .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); // insert PHI in SinkBB, // reuse ABSDstReg to not change uses of ABS instruction @@ -8927,19 +9104,45 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode()); + unsigned ccOutIdx; if (NewOpc) { const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo(); MCID = &TII->get(NewOpc); - assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 && - "converted opcode should be the same except for cc_out"); + assert(MCID->getNumOperands() == + MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() + && "converted opcode should be the same except for cc_out" + " (and, on Thumb1, pred)"); MI.setDesc(*MCID); // Add the optional cc_out operand MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); - } - unsigned ccOutIdx = MCID->getNumOperands() - 1; + + // On Thumb1, move all input operands to the end, then add the predicate + if (Subtarget->isThumb1Only()) { + for (unsigned c = MCID->getNumOperands() - 4; c--;) { + MI.addOperand(MI.getOperand(1)); + MI.RemoveOperand(1); + } + + // Restore the ties + for (unsigned i = MI.getNumOperands(); i--;) { + const MachineOperand& op = MI.getOperand(i); + if (op.isReg() && op.isUse()) { + int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO); + if (DefIdx != -1) + MI.tieOperands(DefIdx, i); + } + } + + MI.addOperand(MachineOperand::CreateImm(ARMCC::AL)); + MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false)); + ccOutIdx = 1; + } else + ccOutIdx = MCID->getNumOperands() - 1; + } else + ccOutIdx = MCID->getNumOperands() - 1; // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. @@ -8970,7 +9173,9 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, if (deadCPSR) { assert(!MI.getOperand(ccOutIdx).getReg() && "expect uninitialized optional cc_out operand"); - return; + // Thumb1 instructions must have the S bit even if the CPSR is dead. + if (!Subtarget->isThumb1Only()) + return; } // If this instruction was defined with an optional CPSR def and its dag node @@ -9032,7 +9237,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDLoc dl(N); EVT VT = N->getValueType(0); CC = N->getOperand(0); - if (CC.getValueType() != MVT::i1) + if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC) return false; Invert = !AllOnes; if (AllOnes) @@ -9308,10 +9513,90 @@ static SDValue findMUL_LOHI(SDValue V) { return SDValue(); } -static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, +static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + if (Subtarget->isThumb()) { + if (!Subtarget->hasDSP()) + return SDValue(); + } else if (!Subtarget->hasV5TEOps()) + return SDValue(); + + // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and + // accumulates the product into a 64-bit value. The 16-bit values will + // be sign extended somehow or SRA'd into 32-bit values + // (addc (adde (mul 16bit, 16bit), lo), hi) + SDValue Mul = AddcNode->getOperand(0); + SDValue Lo = AddcNode->getOperand(1); + if (Mul.getOpcode() != ISD::MUL) { + Lo = AddcNode->getOperand(0); + Mul = AddcNode->getOperand(1); + if (Mul.getOpcode() != ISD::MUL) + return SDValue(); + } + + SDValue SRA = AddeNode->getOperand(0); + SDValue Hi = AddeNode->getOperand(1); + if (SRA.getOpcode() != ISD::SRA) { + SRA = AddeNode->getOperand(1); + Hi = AddeNode->getOperand(0); + if (SRA.getOpcode() != ISD::SRA) + return SDValue(); + } + if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) { + if (Const->getZExtValue() != 31) + return SDValue(); + } else + return SDValue(); + + if (SRA.getOperand(0) != Mul) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(AddcNode); + unsigned Opcode = 0; + SDValue Op0; + SDValue Op1; + + if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) { + Opcode = ARMISD::SMLALBB; + Op0 = Mul.getOperand(0); + Op1 = Mul.getOperand(1); + } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) { + Opcode = ARMISD::SMLALBT; + Op0 = Mul.getOperand(0); + Op1 = Mul.getOperand(1).getOperand(0); + } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) { + Opcode = ARMISD::SMLALTB; + Op0 = Mul.getOperand(0).getOperand(0); + Op1 = Mul.getOperand(1); + } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) { + Opcode = ARMISD::SMLALTT; + Op0 = Mul->getOperand(0).getOperand(0); + Op1 = Mul->getOperand(1).getOperand(0); + } + + if (!Op0 || !Op1) + return SDValue(); + + SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), + Op0, Op1, Lo, Hi); + // Replace the ADDs' nodes uses by the MLA node's values. + SDValue HiMLALResult(SMLAL.getNode(), 1); + SDValue LoMLALResult(SMLAL.getNode(), 0); + + DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); + + // Return original node to notify the driver to stop replacing. + SDValue resNode(AddcNode, 0); + return resNode; +} + +static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - // Look for multiply add opportunities. // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where // each add nodes consumes a value from ISD::UMUL_LOHI and there is @@ -9326,7 +9611,17 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // \ / // ADDC <- hiAdd // - assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); + assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE"); + + assert(AddeNode->getNumOperands() == 3 && + AddeNode->getOperand(2).getValueType() == MVT::i32 && + "ADDE node has the wrong inputs"); + + // Check that we have a glued ADDC node. + SDNode* AddcNode = AddeNode->getOperand(2).getNode(); + if (AddcNode->getOpcode() != ARMISD::ADDC) + return SDValue(); + SDValue AddcOp0 = AddcNode->getOperand(0); SDValue AddcOp1 = AddcNode->getOperand(1); @@ -9338,29 +9633,13 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, AddcNode->getValueType(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"); - // Check that we have a glued ADDC node. - if (AddcNode->getValueType(1) != MVT::Glue) - return SDValue(); - - // Check that the ADDC adds the low result of the S/UMUL_LOHI. + // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it + // maybe a SMLAL which multiplies two 16-bit values. if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && AddcOp0->getOpcode() != ISD::SMUL_LOHI && AddcOp1->getOpcode() != ISD::UMUL_LOHI && AddcOp1->getOpcode() != ISD::SMUL_LOHI) - return SDValue(); - - // Look for the glued ADDE. - SDNode* AddeNode = AddcNode->getGluedUser(); - if (!AddeNode) - return SDValue(); - - // Make sure it is really an ADDE. - if (AddeNode->getOpcode() != ISD::ADDE) - return SDValue(); - - assert(AddeNode->getNumOperands() == 3 && - AddeNode->getOperand(2).getValueType() == MVT::Glue && - "ADDE node has the wrong inputs"); + return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget); // Check for the triangle shape. SDValue AddeOp0 = AddeNode->getOperand(0); @@ -9435,38 +9714,25 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); // Return original node to notify the driver to stop replacing. - SDValue resNode(AddcNode, 0); - return resNode; + return SDValue(AddeNode, 0); } -static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, +static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // UMAAL is similar to UMLAL except that it adds two unsigned values. // While trying to combine for the other MLAL nodes, first search for the - // chance to use UMAAL. Check if Addc uses another addc node which can first - // be combined into a UMLAL. The other pattern is AddcNode being combined - // into an UMLAL and then using another addc is handled in ISelDAGToDAG. - - if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || - (Subtarget->isThumb() && !Subtarget->hasThumb2())) - return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); - - SDNode *PrevAddc = nullptr; - if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC) - PrevAddc = AddcNode->getOperand(0).getNode(); - else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC) - PrevAddc = AddcNode->getOperand(1).getNode(); - - // If there's no addc chains, just return a search for any MLAL. - if (PrevAddc == nullptr) - return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); - - // Try to convert the addc operand to an MLAL and if that fails try to - // combine AddcNode. - SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget); - if (MLAL != SDValue(PrevAddc, 0)) - return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget); + // chance to use UMAAL. Check if Addc uses a node which has already + // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde + // as the addend, and it's handled in PerformUMLALCombine. + + if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) + return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget); + + // Check that we have a glued ADDC node. + SDNode* AddcNode = AddeNode->getOperand(2).getNode(); + if (AddcNode->getOpcode() != ARMISD::ADDC) + return SDValue(); // Find the converted UMAAL or quit if it doesn't exist. SDNode *UmlalNode = nullptr; @@ -9478,29 +9744,18 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, UmlalNode = AddcNode->getOperand(1).getNode(); AddHi = AddcNode->getOperand(0); } else { - return SDValue(); + return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget); } // The ADDC should be glued to an ADDE node, which uses the same UMLAL as // the ADDC as well as Zero. - auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3)); - - if (!Zero || Zero->getZExtValue() != 0) + if (!isNullConstant(UmlalNode->getOperand(3))) return SDValue(); - // Check that we have a glued ADDC node. - if (AddcNode->getValueType(1) != MVT::Glue) - return SDValue(); - - // Look for the glued ADDE. - SDNode* AddeNode = AddcNode->getGluedUser(); - if (!AddeNode) - return SDValue(); - - if ((AddeNode->getOperand(0).getNode() == Zero && + if ((isNullConstant(AddeNode->getOperand(0)) && AddeNode->getOperand(1).getNode() == UmlalNode) || (AddeNode->getOperand(0).getNode() == UmlalNode && - AddeNode->getOperand(1).getNode() == Zero)) { + isNullConstant(AddeNode->getOperand(1)))) { SelectionDAG &DAG = DCI.DAG; SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1), @@ -9513,19 +9768,84 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode, DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0)); // Return original node to notify the driver to stop replacing. - return SDValue(AddcNode, 0); + return SDValue(AddeNode, 0); } return SDValue(); } -/// PerformADDCCombine - Target-specific dag combine transform from -/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or -/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL -static SDValue PerformADDCCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const ARMSubtarget *Subtarget) { +static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) + return SDValue(); + + // Check that we have a pair of ADDC and ADDE as operands. + // Both addends of the ADDE must be zero. + SDNode* AddcNode = N->getOperand(2).getNode(); + SDNode* AddeNode = N->getOperand(3).getNode(); + if ((AddcNode->getOpcode() == ARMISD::ADDC) && + (AddeNode->getOpcode() == ARMISD::ADDE) && + isNullConstant(AddeNode->getOperand(0)) && + isNullConstant(AddeNode->getOperand(1)) && + (AddeNode->getOperand(2).getNode() == AddcNode)) + return DAG.getNode(ARMISD::UMAAL, SDLoc(N), + DAG.getVTList(MVT::i32, MVT::i32), + {N->getOperand(0), N->getOperand(1), + AddcNode->getOperand(0), AddcNode->getOperand(1)}); + else + return SDValue(); +} + +static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + if (Subtarget->isThumb1Only()) { + SDValue RHS = N->getOperand(1); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) { + int32_t imm = C->getSExtValue(); + if (imm < 0 && imm > INT_MIN) { + SDLoc DL(N); + RHS = DAG.getConstant(-imm, DL, MVT::i32); + unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC + : ARMISD::ADDC; + return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS); + } + } + } + return SDValue(); +} - if (Subtarget->isThumb1Only()) return SDValue(); +static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + if (Subtarget->isThumb1Only()) { + SDValue RHS = N->getOperand(1); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) { + int64_t imm = C->getSExtValue(); + if (imm < 0) { + SDLoc DL(N); + + // The with-carry-in form matches bitwise not instead of the negation. + // Effectively, the inverse interpretation of the carry flag already + // accounts for part of the negation. + RHS = DAG.getConstant(~imm, DL, MVT::i32); + + unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE + : ARMISD::ADDE; + return DAG.getNode(Opcode, DL, N->getVTList(), + N->getOperand(0), RHS, N->getOperand(2)); + } + } + } + return SDValue(); +} + +/// PerformADDECombine - Target-specific dag combine transform from +/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or +/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL +static SDValue PerformADDECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Only ARM and Thumb2 support UMLAL/SMLAL. + if (Subtarget->isThumb1Only()) + return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); // Only perform the checks after legalize when the pattern is available. if (DCI.isBeforeLegalize()) return SDValue(); @@ -9722,7 +10042,6 @@ static SDValue PerformMULCombine(SDNode *N, static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); SDLoc dl(N); @@ -9761,6 +10080,67 @@ static SDValue PerformANDCombine(SDNode *N, return SDValue(); } +// Try combining OR nodes to SMULWB, SMULWT. +static SDValue PerformORCombineToSMULWBT(SDNode *OR, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasV6Ops() || + (Subtarget->isThumb() && + (!Subtarget->hasThumb2() || !Subtarget->hasDSP()))) + return SDValue(); + + SDValue SRL = OR->getOperand(0); + SDValue SHL = OR->getOperand(1); + + if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) { + SRL = OR->getOperand(1); + SHL = OR->getOperand(0); + } + if (!isSRL16(SRL) || !isSHL16(SHL)) + return SDValue(); + + // The first operands to the shifts need to be the two results from the + // same smul_lohi node. + if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) || + SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI) + return SDValue(); + + SDNode *SMULLOHI = SRL.getOperand(0).getNode(); + if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) || + SHL.getOperand(0) != SDValue(SMULLOHI, 1)) + return SDValue(); + + // Now we have: + // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16))) + // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments. + // For SMUWB the 16-bit value will signed extended somehow. + // For SMULWT only the SRA is required. + // Check both sides of SMUL_LOHI + SDValue OpS16 = SMULLOHI->getOperand(0); + SDValue OpS32 = SMULLOHI->getOperand(1); + + SelectionDAG &DAG = DCI.DAG; + if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) { + OpS16 = OpS32; + OpS32 = SMULLOHI->getOperand(0); + } + + SDLoc dl(OR); + unsigned Opcode = 0; + if (isS16(OpS16, DAG)) + Opcode = ARMISD::SMULWB; + else if (isSRA16(OpS16)) { + Opcode = ARMISD::SMULWT; + OpS16 = OpS16->getOperand(0); + } + else + return SDValue(); + + SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16); + DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res); + return SDValue(OR, 0); +} + /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -9798,6 +10178,8 @@ static SDValue PerformORCombine(SDNode *N, // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; + if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget)) + return Result; } // The code below optimizes (or (and X, Y), Z). @@ -9906,7 +10288,7 @@ static SDValue PerformORCombine(SDNode *N, (Mask == ~Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. - if (Subtarget->hasT2ExtractPack() && + if (Subtarget->hasDSP() && (Mask == 0xffff || Mask == 0xffff0000)) return SDValue(); // 2a @@ -9922,7 +10304,7 @@ static SDValue PerformORCombine(SDNode *N, (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. - if (Subtarget->hasT2ExtractPack() && + if (Subtarget->hasDSP() && (Mask2 == 0xffff || Mask2 == 0xffff0000)) return SDValue(); // 2b @@ -11324,8 +11706,8 @@ static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, if (Op.getOpcode() == ARMISD::CMOV) { APInt KZ2(KnownZero.getBitWidth(), 0); APInt KO2(KnownOne.getBitWidth(), 0); - computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne); - computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2); + computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); + computeKnownBits(DAG, Op.getOperand(1), KZ2, KO2); KnownZero &= KZ2; KnownOne &= KO2; @@ -11555,13 +11937,17 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; - case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); + case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); + case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); + case ARMISD::ADDC: + case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget); + case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); @@ -11593,6 +11979,56 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return PerformVLDCombine(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); + case ARMISD::SMULWB: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); + if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)) + return SDValue(); + break; + } + case ARMISD::SMULWT: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16); + if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)) + return SDValue(); + break; + } + case ARMISD::SMLALBB: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); + if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))) + return SDValue(); + break; + } + case ARMISD::SMLALBT: { + unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits(); + APInt LowMask = APInt::getLowBitsSet(LowWidth, 16); + unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits(); + APInt HighMask = APInt::getHighBitsSet(HighWidth, 16); + if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI))) + return SDValue(); + break; + } + case ARMISD::SMLALTB: { + unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits(); + APInt HighMask = APInt::getHighBitsSet(HighWidth, 16); + unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits(); + APInt LowMask = APInt::getLowBitsSet(LowWidth, 16); + if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI))) + return SDValue(); + break; + } + case ARMISD::SMLALTT: { + unsigned BitWidth = N->getValueType(0).getSizeInBits(); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16); + if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))) + return SDValue(); + break; + } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { @@ -12180,6 +12616,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { unsigned BitWidth = KnownOne.getBitWidth(); @@ -12588,8 +13025,8 @@ static TargetLowering::ArgListTy getDivRemArgList( Type *ArgTy = ArgVT.getTypeForEVT(*Context); Entry.Node = N->getOperand(i); Entry.Ty = ArgTy; - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; + Entry.IsSExt = isSigned; + Entry.IsZExt = !isSigned; Args.push_back(Entry); } if (Subtarget->isTargetWindows() && Args.size() >= 2) @@ -12861,7 +13298,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } case Intrinsic::arm_stlexd: - case Intrinsic::arm_strexd: { + case Intrinsic::arm_strexd: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(2); @@ -12871,9 +13308,9 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.readMem = false; Info.writeMem = true; return true; - } + case Intrinsic::arm_ldaexd: - case Intrinsic::arm_ldrexd: { + case Intrinsic::arm_ldrexd: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); @@ -12883,7 +13320,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.readMem = true; Info.writeMem = false; return true; - } + default: break; } @@ -12921,7 +13358,7 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { - Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); + Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(7), Builder.getInt32(10), Builder.getInt32(5)}; @@ -12932,7 +13369,7 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, llvm_unreachable("makeDMB on a target so old that it has no barriers"); } } else { - Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); + Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); // Only a full system barrier exists in the M-class architectures. Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain; Constant *CDomain = Builder.getInt32(Domain); @@ -13089,7 +13526,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, if (ValTy->getPrimitiveSizeInBits() == 64) { Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; - Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); + Function *Ldrex = Intrinsic::getDeclaration(M, Int); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); @@ -13106,7 +13543,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; - Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); + Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateTruncOrBitCast( Builder.CreateCall(Ldrex, Addr), @@ -13118,7 +13555,7 @@ void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance( if (!Subtarget->hasV7Ops()) return; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex)); + Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex)); } Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, @@ -13154,6 +13591,39 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Addr}); } +/// A helper function for determining the number of interleaved accesses we +/// will generate when lowering accesses of the given type. +unsigned +ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy, + const DataLayout &DL) const { + return (DL.getTypeSizeInBits(VecTy) + 127) / 128; +} + +bool ARMTargetLowering::isLegalInterleavedAccessType( + VectorType *VecTy, const DataLayout &DL) const { + + unsigned VecSize = DL.getTypeSizeInBits(VecTy); + unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType()); + + // Ensure the vector doesn't have f16 elements. Even though we could do an + // i16 vldN, we can't hold the f16 vectors and will end up converting via + // f32. + if (VecTy->getElementType()->isHalfTy()) + return false; + + // Ensure the number of vector elements is greater than 1. + if (VecTy->getNumElements() < 2) + return false; + + // Ensure the element type is legal. + if (ElSize != 8 && ElSize != 16 && ElSize != 32) + return false; + + // Ensure the total vector size is 64 or a multiple of 128. Types larger than + // 128 will be split into multiple interleaved accesses. + return VecSize == 64 || VecSize % 128 == 0; +} + /// \brief Lower an interleaved load into a vldN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): @@ -13178,64 +13648,97 @@ bool ARMTargetLowering::lowerInterleavedLoad( Type *EltTy = VecTy->getVectorElementType(); const DataLayout &DL = LI->getModule()->getDataLayout(); - unsigned VecSize = DL.getTypeSizeInBits(VecTy); - bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; - // Skip if we do not have NEON and skip illegal vector types and vector types - // with i64/f64 elements (vldN doesn't support i64/f64 elements). - if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits) + // Skip if we do not have NEON and skip illegal vector types. We can + // "legalize" wide vector types into multiple interleaved accesses as long as + // the vector types are divisible by 128. + if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL)) return false; + unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL); + // A pointer vector can not be the return type of the ldN intrinsics. Need to // load integer vectors first and then convert to pointer vectors. if (EltTy->isPointerTy()) VecTy = VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements()); + IRBuilder<> Builder(LI); + + // The base address of the load. + Value *BaseAddr = LI->getPointerOperand(); + + if (NumLoads > 1) { + // If we're going to generate more than one load, reset the sub-vector type + // to something legal. + VecTy = VectorType::get(VecTy->getVectorElementType(), + VecTy->getVectorNumElements() / NumLoads); + + // We will compute the pointer operand of each load from the original base + // address using GEPs. Cast the base address to a pointer to the scalar + // element type. + BaseAddr = Builder.CreateBitCast( + BaseAddr, VecTy->getVectorElementType()->getPointerTo( + LI->getPointerAddressSpace())); + } + + assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!"); + + Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace()); + Type *Tys[] = {VecTy, Int8Ptr}; static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2, Intrinsic::arm_neon_vld3, Intrinsic::arm_neon_vld4}; + Function *VldnFunc = + Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); - IRBuilder<> Builder(LI); - SmallVector<Value *, 2> Ops; + // Holds sub-vectors extracted from the load intrinsic return values. The + // sub-vectors are associated with the shufflevector instructions they will + // replace. + DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs; - Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace()); - Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr)); - Ops.push_back(Builder.getInt32(LI->getAlignment())); + for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) { - Type *Tys[] = { VecTy, Int8Ptr }; - Function *VldnFunc = - Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); - CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); + // If we're generating more than one load, compute the base address of + // subsequent loads as an offset from the previous. + if (LoadCount > 0) + BaseAddr = Builder.CreateConstGEP1_32( + BaseAddr, VecTy->getVectorNumElements() * Factor); - // Replace uses of each shufflevector with the corresponding vector loaded - // by ldN. - for (unsigned i = 0; i < Shuffles.size(); i++) { - ShuffleVectorInst *SV = Shuffles[i]; - unsigned Index = Indices[i]; + SmallVector<Value *, 2> Ops; + Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr)); + Ops.push_back(Builder.getInt32(LI->getAlignment())); - Value *SubVec = Builder.CreateExtractValue(VldN, Index); + CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); - // Convert the integer vector to pointer vector if the element is pointer. - if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); + // Replace uses of each shufflevector with the corresponding vector loaded + // by ldN. + for (unsigned i = 0; i < Shuffles.size(); i++) { + ShuffleVectorInst *SV = Shuffles[i]; + unsigned Index = Indices[i]; - SV->replaceAllUsesWith(SubVec); - } + Value *SubVec = Builder.CreateExtractValue(VldN, Index); - return true; -} + // Convert the integer vector to pointer vector if the element is pointer. + if (EltTy->isPointerTy()) + SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); -/// \brief Get a mask consisting of sequential integers starting from \p Start. -/// -/// I.e. <Start, Start + 1, ..., Start + NumElts - 1> -static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start, - unsigned NumElts) { - SmallVector<Constant *, 16> Mask; - for (unsigned i = 0; i < NumElts; i++) - Mask.push_back(Builder.getInt32(Start + i)); + SubVecs[SV].push_back(SubVec); + } + } + + // Replace uses of the shufflevector instructions with the sub-vectors + // returned by the load intrinsic. If a shufflevector instruction is + // associated with more than one sub-vector, those sub-vectors will be + // concatenated into a single wide vector. + for (ShuffleVectorInst *SVI : Shuffles) { + auto &SubVec = SubVecs[SVI]; + auto *WideVec = + SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0]; + SVI->replaceAllUsesWith(WideVec); + } - return ConstantVector::get(Mask); + return true; } /// \brief Lower an interleaved store into a vstN intrinsic. @@ -13279,15 +13782,15 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, LaneLen); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); - bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; - // Skip if we do not have NEON and skip illegal vector types and vector types - // with i64/f64 elements (vstN doesn't support i64/f64 elements). - if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) || - EltIs64Bits) + // Skip if we do not have NEON and skip illegal vector types. We can + // "legalize" wide vector types into multiple interleaved accesses as long as + // the vector types are divisible by 128. + if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL)) return false; + unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL); + Value *Op0 = SVI->getOperand(0); Value *Op1 = SVI->getOperand(1); IRBuilder<> Builder(SI); @@ -13306,44 +13809,75 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, SubVecTy = VectorType::get(IntTy, LaneLen); } + // The base address of the store. + Value *BaseAddr = SI->getPointerOperand(); + + if (NumStores > 1) { + // If we're going to generate more than one store, reset the lane length + // and sub-vector type to something legal. + LaneLen /= NumStores; + SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen); + + // We will compute the pointer operand of each store from the original base + // address using GEPs. Cast the base address to a pointer to the scalar + // element type. + BaseAddr = Builder.CreateBitCast( + BaseAddr, SubVecTy->getVectorElementType()->getPointerTo( + SI->getPointerAddressSpace())); + } + + assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!"); + + auto Mask = SVI->getShuffleMask(); + + Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); + Type *Tys[] = {Int8Ptr, SubVecTy}; static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4}; - SmallVector<Value *, 6> Ops; - Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); - Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr)); + for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) { - Type *Tys[] = { Int8Ptr, SubVecTy }; - Function *VstNFunc = Intrinsic::getDeclaration( - SI->getModule(), StoreInts[Factor - 2], Tys); + // If we generating more than one store, we compute the base address of + // subsequent stores as an offset from the previous. + if (StoreCount > 0) + BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor); - // Split the shufflevector operands into sub vectors for the new vstN call. - auto Mask = SVI->getShuffleMask(); - for (unsigned i = 0; i < Factor; i++) { - if (Mask[i] >= 0) { - Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen))); - } else { - unsigned StartMask = 0; - for (unsigned j = 1; j < LaneLen; j++) { - if (Mask[j*Factor + i] >= 0) { - StartMask = Mask[j*Factor + i] - j; - break; + SmallVector<Value *, 6> Ops; + Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr)); + + Function *VstNFunc = + Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys); + + // Split the shufflevector operands into sub vectors for the new vstN call. + for (unsigned i = 0; i < Factor; i++) { + unsigned IdxI = StoreCount * LaneLen * Factor + i; + if (Mask[IdxI] >= 0) { + Ops.push_back(Builder.CreateShuffleVector( + Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0))); + } else { + unsigned StartMask = 0; + for (unsigned j = 1; j < LaneLen; j++) { + unsigned IdxJ = StoreCount * LaneLen * Factor + j; + if (Mask[IdxJ * Factor + IdxI] >= 0) { + StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ; + break; + } } + // Note: If all elements in a chunk are undefs, StartMask=0! + // Note: Filling undef gaps with random elements is ok, since + // those elements were being written anyway (with undefs). + // In the case of all undefs we're defaulting to using elems from 0 + // Note: StartMask cannot be negative, it's checked in + // isReInterleaveMask + Ops.push_back(Builder.CreateShuffleVector( + Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0))); } - // Note: If all elements in a chunk are undefs, StartMask=0! - // Note: Filling undef gaps with random elements is ok, since - // those elements were being written anyway (with undefs). - // In the case of all undefs we're defaulting to using elems from 0 - // Note: StartMask cannot be negative, it's checked in isReInterleaveMask - Ops.push_back(Builder.CreateShuffleVector( - Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen))); } - } - Ops.push_back(Builder.getInt32(SI->getAlignment())); - Builder.CreateCall(VstNFunc, Ops); + Ops.push_back(Builder.getInt32(SI->getAlignment())); + Builder.CreateCall(VstNFunc, Ops); + } return true; } diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 84c6eb845bb8..70a0b1380ec9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -175,9 +175,15 @@ class InstrItineraryData; VMULLs, // ...signed VMULLu, // ...unsigned + SMULWB, // Signed multiply word by half word, bottom + SMULWT, // Signed multiply word by half word, top UMLAL, // 64bit Unsigned Accumulate Multiply SMLAL, // 64bit Signed Accumulate Multiply UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply + SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16 + SMLALBT, // 64-bit signed accumulate multiply bottom, top 16 + SMLALTB, // 64-bit signed accumulate multiply top, bottom 16 + SMLALTT, // 64-bit signed accumulate multiply top, top 16 // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other @@ -346,6 +352,7 @@ class InstrItineraryData; void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override; @@ -500,9 +507,18 @@ class InstrItineraryData; bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; + bool canMergeStoresTo(EVT MemVT) const override { + // Do not merge to larger than i32. + return (MemVT.getSizeInBits() <= 32); + } + bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { + return VT.isScalarInteger(); + } + bool supportSwiftError() const override { return true; } @@ -514,6 +530,17 @@ class InstrItineraryData; CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const; + /// Returns true if \p VecTy is a legal interleaved access type. This + /// function checks the vector element type and the overall width of the + /// vector. + bool isLegalInterleavedAccessType(VectorType *VecTy, + const DataLayout &DL) const; + + /// Returns the number of interleaved accesses that will be generated when + /// lowering accesses of the given type. + unsigned getNumInterleavedAccesses(VectorType *VecTy, + const DataLayout &DL) const; + protected: std::pair<const TargetRegisterClass *, uint8_t> findRepresentativeClass(const TargetRegisterInfo *TRI, @@ -698,7 +725,7 @@ class InstrItineraryData; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - const SDLoc &dl) const; + const SDLoc &dl, bool InvalidOnQNaN) const; SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index 488439fc24e0..1bbe7f0d275e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -184,7 +184,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> { // ARM special operands for disassembly only. // -def SetEndAsmOperand : ImmAsmOperand { +def SetEndAsmOperand : ImmAsmOperand<0,1> { let Name = "SetEndImm"; let ParserMethod = "parseSetEndImm"; } @@ -221,25 +221,25 @@ def banked_reg : Operand<i32> { // 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0> // 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0> // 64 64 - <imm> is encoded in imm6<5:0> -def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; } +def shr_imm8_asm_operand : ImmAsmOperand<1,8> { let Name = "ShrImm8"; } def shr_imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 8; }]> { let EncoderMethod = "getShiftRight8Imm"; let DecoderMethod = "DecodeShiftRight8Imm"; let ParserMatchClass = shr_imm8_asm_operand; } -def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; } +def shr_imm16_asm_operand : ImmAsmOperand<1,16> { let Name = "ShrImm16"; } def shr_imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 16; }]> { let EncoderMethod = "getShiftRight16Imm"; let DecoderMethod = "DecodeShiftRight16Imm"; let ParserMatchClass = shr_imm16_asm_operand; } -def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; } +def shr_imm32_asm_operand : ImmAsmOperand<1,32> { let Name = "ShrImm32"; } def shr_imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> { let EncoderMethod = "getShiftRight32Imm"; let DecoderMethod = "DecodeShiftRight32Imm"; let ParserMatchClass = shr_imm32_asm_operand; } -def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; } +def shr_imm64_asm_operand : ImmAsmOperand<1,64> { let Name = "ShrImm64"; } def shr_imm64 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 64; }]> { let EncoderMethod = "getShiftRight64Imm"; let DecoderMethod = "DecodeShiftRight64Imm"; @@ -261,10 +261,19 @@ def const_pool_asm_imm : Operand<i32> { // Note: When EmitPriority == 1, the alias will be used for printing class ARMInstAlias<string Asm, dag Result, bit EmitPriority = 0> : InstAlias<Asm, Result, EmitPriority>, Requires<[IsARM]>; +class ARMInstSubst<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, + Requires<[IsARM,UseNegativeImmediates]>; class tInstAlias<string Asm, dag Result, bit EmitPriority = 0> : InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb]>; +class tInstSubst<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, + Requires<[IsThumb,UseNegativeImmediates]>; class t2InstAlias<string Asm, dag Result, bit EmitPriority = 0> : InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb2]>; +class t2InstSubst<string Asm, dag Result, bit EmitPriority = 0> + : InstAlias<Asm, Result, EmitPriority>, + Requires<[IsThumb2,UseNegativeImmediates]>; class VFP2InstAlias<string Asm, dag Result, bit EmitPriority = 0> : InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP2]>; class VFP2DPInstAlias<string Asm, dag Result, bit EmitPriority = 0> @@ -948,7 +957,7 @@ class ADivA1I<bits<3> opcod, dag oops, dag iops, } // PKH instructions -def PKHLSLAsmOperand : ImmAsmOperand { +def PKHLSLAsmOperand : ImmAsmOperand<0,31> { let Name = "PKHLSLImm"; let ParserMethod = "parsePKHLSLImm"; } @@ -1013,9 +1022,6 @@ class Thumb2DSPPat<dag pattern, dag result> : Pat<pattern, result> { class Thumb2DSPMulPat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsThumb2, UseMulOps, HasDSP]; } -class Thumb2ExtractPat<dag pattern, dag result> : Pat<pattern, result> { - list<Predicate> Predicates = [IsThumb2, HasT2ExtractPack]; -} //===----------------------------------------------------------------------===// // Thumb Instruction Format Definitions. // diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index 27b64322dfa9..3b3606ef462a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -129,8 +129,9 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const { MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4); MIB.addMemOperand(MMO); - MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg); - MIB.addReg(Reg, RegState::Kill).addImm(0); - MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); - AddDefaultPred(MIB); + BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg) + .addReg(Reg, RegState::Kill) + .addImm(0) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()) + .add(predOps(ARMCC::AL)); } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index c47393990e97..cc0e7d4d9c35 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -51,6 +51,8 @@ def SDT_ARMAnd : SDTypeProfile<1, 2, SDTCisVT<2, i32>]>; def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; @@ -90,6 +92,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, SDTCisVT<1, i32>, SDTCisVT<4, i32>]>; +def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisSameAs<0, 4>, + SDTCisSameAs<0, 5>]>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; @@ -181,6 +190,13 @@ def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; +def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>; +def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>; +def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>; +def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>; +def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>; +def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // @@ -247,9 +263,6 @@ def HasDivide : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">; def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">; -def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, - AssemblerPredicate<"FeatureT2XtPk", - "pack/extract">; def HasDSP : Predicate<"Subtarget->hasDSP()">, AssemblerPredicate<"FeatureDSP", "dsp">; def HasDB : Predicate<"Subtarget->hasDataBarrier()">, @@ -298,6 +311,11 @@ def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, AssemblerPredicate<"FeatureNaClTrap", "NaCl">; def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; +def UseNegativeImmediates : + Predicate<"false">, + AssemblerPredicate<"!FeatureNoNegativeImmediates", + "NegativeImmediates">; + // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; @@ -423,7 +441,16 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ // // Immediate operands with a shared generic asm render method. -class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; } +class ImmAsmOperand<int Low, int High> : AsmOperandClass { + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; + let DiagnosticType = "ImmRange" # Low # "_" # High; +} + +class ImmAsmOperandMinusOne<int Low, int High> : AsmOperandClass { + let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; + let DiagnosticType = "ImmRange" # Low # "_" # High; +} // Operands that are part of a memory addressing mode. class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; } @@ -645,35 +672,45 @@ def arm_i32imm : PatLeaf<(imm), [{ }]>; /// imm0_1 predicate - Immediate in the range [0,1]. -def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; } +def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; } def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; } /// imm0_3 predicate - Immediate in the range [0,3]. -def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; } +def Imm0_3AsmOperand: ImmAsmOperand<0,3> { let Name = "Imm0_3"; } def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; } /// imm0_7 predicate - Immediate in the range [0,7]. -def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } +def Imm0_7AsmOperand: ImmAsmOperand<0,7> { + let Name = "Imm0_7"; +} def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 8; }]> { let ParserMatchClass = Imm0_7AsmOperand; } +/// imm8_255 predicate - Immediate in the range [8,255]. +def Imm8_255AsmOperand: ImmAsmOperand<8,255> { let Name = "Imm8_255"; } +def imm8_255 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 8 && Imm < 256; +}]> { + let ParserMatchClass = Imm8_255AsmOperand; +} + /// imm8 predicate - Immediate is exactly 8. -def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; } +def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; } def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> { let ParserMatchClass = Imm8AsmOperand; } /// imm16 predicate - Immediate is exactly 16. -def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; } +def Imm16AsmOperand: ImmAsmOperand<16,16> { let Name = "Imm16"; } def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> { let ParserMatchClass = Imm16AsmOperand; } /// imm32 predicate - Immediate is exactly 32. -def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; } +def Imm32AsmOperand: ImmAsmOperand<32,32> { let Name = "Imm32"; } def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> { let ParserMatchClass = Imm32AsmOperand; } @@ -681,25 +718,25 @@ def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> { def imm8_or_16 : ImmLeaf<i32, [{ return Imm == 8 || Imm == 16;}]>; /// imm1_7 predicate - Immediate in the range [1,7]. -def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; } +def Imm1_7AsmOperand: ImmAsmOperand<1,7> { let Name = "Imm1_7"; } def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> { let ParserMatchClass = Imm1_7AsmOperand; } /// imm1_15 predicate - Immediate in the range [1,15]. -def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; } +def Imm1_15AsmOperand: ImmAsmOperand<1,15> { let Name = "Imm1_15"; } def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> { let ParserMatchClass = Imm1_15AsmOperand; } /// imm1_31 predicate - Immediate in the range [1,31]. -def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; } +def Imm1_31AsmOperand: ImmAsmOperand<1,31> { let Name = "Imm1_31"; } def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> { let ParserMatchClass = Imm1_31AsmOperand; } /// imm0_15 predicate - Immediate in the range [0,15]. -def Imm0_15AsmOperand: ImmAsmOperand { +def Imm0_15AsmOperand: ImmAsmOperand<0,15> { let Name = "Imm0_15"; let DiagnosticType = "ImmRange0_15"; } @@ -710,7 +747,7 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. -def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; } +def Imm0_31AsmOperand: ImmAsmOperand<0,31> { let Name = "Imm0_31"; } def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]> { @@ -718,15 +755,15 @@ def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32]. -def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; } +def Imm0_32AsmOperand: ImmAsmOperand<0,32> { let Name = "Imm0_32"; } def imm0_32 : Operand<i32>, ImmLeaf<i32, [{ - return Imm >= 0 && Imm < 32; + return Imm >= 0 && Imm < 33; }]> { let ParserMatchClass = Imm0_32AsmOperand; } /// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63]. -def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; } +def Imm0_63AsmOperand: ImmAsmOperand<0,63> { let Name = "Imm0_63"; } def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 64; }]> { @@ -734,7 +771,7 @@ def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_239 predicate - Immediate in the range [0,239]. -def Imm0_239AsmOperand : ImmAsmOperand { +def Imm0_239AsmOperand : ImmAsmOperand<0,239> { let Name = "Imm0_239"; let DiagnosticType = "ImmRange0_239"; } @@ -743,13 +780,13 @@ def imm0_239 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 240; }]> { } /// imm0_255 predicate - Immediate in the range [0,255]. -def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; } +def Imm0_255AsmOperand : ImmAsmOperand<0,255> { let Name = "Imm0_255"; } def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { let ParserMatchClass = Imm0_255AsmOperand; } -/// imm0_65535 - An immediate is in the range [0.65535]. -def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; } +/// imm0_65535 - An immediate is in the range [0,65535]. +def Imm0_65535AsmOperand: ImmAsmOperand<0,65535> { let Name = "Imm0_65535"; } def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 65536; }]> { @@ -767,19 +804,23 @@ def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{ // FIXME: This really needs a Thumb version separate from the ARM version. // While the range is the same, and can thus use the same match class, // the encoding is different so it should have a different encoder method. -def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; } +def Imm0_65535ExprAsmOperand: AsmOperandClass { + let Name = "Imm0_65535Expr"; + let RenderMethod = "addImmOperands"; +} + def imm0_65535_expr : Operand<i32> { let EncoderMethod = "getHiLo16ImmOpValue"; let ParserMatchClass = Imm0_65535ExprAsmOperand; } -def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; } +def Imm256_65535ExprAsmOperand: ImmAsmOperand<256,65535> { let Name = "Imm256_65535Expr"; } def imm256_65535_expr : Operand<i32> { let ParserMatchClass = Imm256_65535ExprAsmOperand; } /// imm24b - True if the 32-bit immediate is encodable in 24 bits. -def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; } +def Imm24bitAsmOperand: ImmAsmOperand<0,0xffffff> { let Name = "Imm24bit"; } def imm24b : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 0xffffff; }]> { @@ -808,7 +849,9 @@ def imm1_32_XFORM: SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N), MVT::i32); }]>; -def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; } +def Imm1_32AsmOperand: ImmAsmOperandMinusOne<1,32> { + let Name = "Imm1_32"; +} def imm1_32 : Operand<i32>, PatLeaf<(imm), [{ uint64_t Imm = N->getZExtValue(); return Imm > 0 && Imm <= 32; @@ -822,7 +865,7 @@ def imm1_16_XFORM: SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N), MVT::i32); }]>; -def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; } +def Imm1_16AsmOperand: ImmAsmOperandMinusOne<1,16> { let Name = "Imm1_16"; } def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }], imm1_16_XFORM> { let PrintMethod = "printImmPlusOneOperand"; @@ -3850,6 +3893,7 @@ def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, let Inst{11-0} = imm; } +let AddedComplexity = 1 in def : ARMPat<(and GPR:$src, mod_imm_not:$imm), (BICri GPR:$src, mod_imm_not:$imm)>; @@ -3899,7 +3943,8 @@ def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b0000; let Unpredictable{15-12} = 0b1111; } @@ -3910,14 +3955,16 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, 4, IIC_iMUL32, [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6, UseMulOps]>; + Requires<[IsARM, NoV6, UseMulOps]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; } def MLA : AsMul1I32<0b0000001, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))]>, - Requires<[IsARM, HasV6, UseMulOps]> { + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { bits<4> Ra; let Inst{15-12} = Ra; } @@ -3928,12 +3975,14 @@ def MLAv5: ARMPseudoExpand<(outs GPRnopc:$Rd), pred:$p, cc_out:$s), 4, IIC_iMAC32, [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))], (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, - Requires<[IsARM, HasV6T2, UseMulOps]> { + Requires<[IsARM, HasV6T2, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { bits<4> Rd; bits<4> Rm; bits<4> Rn; @@ -3949,26 +3998,38 @@ let hasSideEffects = 0 in { let isCommutable = 1 in { def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, - "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + "smull", "\t$RdLo, $RdHi, $Rn, $Rm", + [(set GPR:$RdLo, GPR:$RdHi, + (smullohi GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, - "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + "umull", "\t$RdLo, $RdHi, $Rn, $Rm", + [(set GPR:$RdLo, GPR:$RdHi, + (umullohi GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL]>; let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), - 4, IIC_iMUL64, [], + 4, IIC_iMUL64, + [(set GPR:$RdLo, GPR:$RdHi, + (smullohi GPR:$Rn, GPR:$Rm))], (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), - 4, IIC_iMUL64, [], + 4, IIC_iMUL64, + [(set GPR:$RdLo, GPR:$RdHi, + (umullohi GPR:$Rn, GPR:$Rm))], (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; } } @@ -3976,17 +4037,20 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> { + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rm; @@ -4004,13 +4068,15 @@ def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), 4, IIC_iMAC64, [], (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; } } // hasSideEffects @@ -4019,13 +4085,15 @@ def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b1111; } def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b1111; } @@ -4033,57 +4101,67 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6, UseMulOps]>; + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6, UseMulOps]>; + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; multiclass AI_smul<string opc> { def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sext_inreg GPR:$Rm, i16)))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sra GPR:$Rm, (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sext_inreg GPR:$Rm, i16)))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sra GPR:$Rm, (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", - []>, - Requires<[IsARM, HasV5TE]>; + [(set GPR:$Rd, (ARMsmulwb GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", - []>, - Requires<[IsARM, HasV5TE]>; + [(set GPR:$Rd, (ARMsmulwt GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; } @@ -4095,7 +4173,8 @@ multiclass AI_smla<string opc> { [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4103,7 +4182,8 @@ multiclass AI_smla<string opc> { [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4111,7 +4191,8 @@ multiclass AI_smla<string opc> { [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4119,19 +4200,24 @@ multiclass AI_smla<string opc> { [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", - []>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + [(set GPRnopc:$Rd, + (add GPR:$Ra, (ARMsmulwb GPRnopc:$Rn, GPRnopc:$Rm)))]>, + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", - []>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + [(set GPRnopc:$Rd, + (add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>, + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; } } @@ -4139,25 +4225,28 @@ defm SMUL : AI_smul<"smul">; defm SMLA : AI_smla<"smla">; // Halfword multiply accumulate long: SMLAL<x><y>. -def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; - -def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; - -def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; - -def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; +class SMLAL<bits<2> opc1, string asm> + : AMulxyI64<0b0001010, opc1, + (outs GPRnopc:$RdLo, GPRnopc:$RdHi), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + IIC_iMAC64, asm, "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; + +def SMLALBB : SMLAL<0b00, "smlalbb">; +def SMLALBT : SMLAL<0b10, "smlalbt">; +def SMLALTB : SMLAL<0b01, "smlaltb">; +def SMLALTT : SMLAL<0b11, "smlaltt">; + +def : ARMV5TEPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALBB $Rn, $Rm, $RLo, $RHi)>; +def : ARMV5TEPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALBT $Rn, $Rm, $RLo, $RHi)>; +def : ARMV5TEPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALTB $Rn, $Rm, $RLo, $RHi)>; +def : ARMV5TEPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (SMLALTT $Rn, $Rm, $RLo, $RHi)>; // Helper class for AI_smld. class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops, @@ -4203,19 +4292,23 @@ multiclass AI_smld<bit sub, string opc> { def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">; + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">; + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, - !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">; + !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, - !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">; + !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; } @@ -4225,9 +4318,11 @@ defm SMLS : AI_smld<1, "smls">; multiclass AI_sdml<bit sub, string opc> { def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), - NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">; + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">; + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; } defm SMUA : AI_sdml<0, "smua">; @@ -4239,12 +4334,14 @@ defm SMUS : AI_sdml<1, "smus">; def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasDivideInARM]>; + Requires<[IsARM, HasDivideInARM]>, + Sched<[WriteDIV]>; def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasDivideInARM]>; + Requires<[IsARM, HasDivideInARM]>, + Sched<[WriteDIV]>; //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. @@ -4831,14 +4928,15 @@ let AddedComplexity = 8 in { def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>; } -// SWP/SWPB are deprecated in V6/V7. +// SWP/SWPB are deprecated in V6/V7 and optional in v7VE. +// FIXME Use InstAlias to generate LDREX/STREX pairs instead. let mayLoad = 1, mayStore = 1 in { def SWP : AIswp<0, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>, - Requires<[PreV8]>; + Requires<[IsARM,PreV8]>; def SWPB: AIswp<1, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>, - Requires<[PreV8]>; + Requires<[IsARM,PreV8]>; } //===----------------------------------------------------------------------===// @@ -4850,7 +4948,7 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]>, - Requires<[PreV8]> { + Requires<[IsARM,PreV8]> { bits<4> opc1; bits<4> CRn; bits<4> CRd; @@ -4872,7 +4970,7 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]>, - Requires<[PreV8]> { + Requires<[IsARM,PreV8]> { let Inst{31-28} = 0b1111; bits<4> opc1; bits<4> CRn; @@ -5048,13 +5146,13 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> { defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; -defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; +defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>; defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; -defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; -defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>; +defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; //===----------------------------------------------------------------------===// // Move between coprocessor and ARM core register. @@ -5132,7 +5230,7 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>, - Requires<[PreV8]>; + Requires<[IsARM,PreV8]>; def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0)>; @@ -5140,7 +5238,7 @@ def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>, - Requires<[PreV8]>; + Requires<[IsARM,PreV8]>; def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; @@ -5183,7 +5281,7 @@ class MovRRCopro2<string opc, bit direction, dag oops, dag iops, list<dag> pattern = []> : ABXI<0b1100, oops, iops, NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>, - Requires<[PreV8]> { + Requires<[IsARM,PreV8]> { let Inst{31-28} = 0b1111; let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -5525,20 +5623,26 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; // smul* and smla* def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), - (SMULBB GPR:$a, GPR:$b)>; + (SMULBB GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>; + (SMULBT GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), - (SMULTB GPR:$a, GPR:$b)>; + (SMULTB GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; // Pre-v7 uses MCR for synchronization barriers. def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, @@ -5717,33 +5821,49 @@ def : MnemonicAlias<"usubaddx", "usax">; // "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like // for isel. -def : ARMInstAlias<"mov${s}${p} $Rd, $imm", +def : ARMInstSubst<"mov${s}${p} $Rd, $imm", (MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"mvn${s}${p} $Rd, $imm", +def : ARMInstSubst<"mvn${s}${p} $Rd, $imm", (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Same for AND <--> BIC -def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm", +def : ARMInstSubst<"bic${s}${p} $Rd, $Rn, $imm", (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"bic${s}${p} $Rdn, $imm", +def : ARMInstSubst<"bic${s}${p} $Rdn, $imm", (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm", +def : ARMInstSubst<"and${s}${p} $Rd, $Rn, $imm", (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"and${s}${p} $Rdn, $imm", +def : ARMInstSubst<"and${s}${p} $Rdn, $imm", (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Likewise, "add Rd, mod_imm_neg" -> sub -def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm", +def : ARMInstSubst<"add${s}${p} $Rd, $Rn, $imm", (SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"add${s}${p} $Rd, $imm", +def : ARMInstSubst<"add${s}${p} $Rd, $imm", (SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; +// Likewise, "sub Rd, mod_imm_neg" -> add +def : ARMInstSubst<"sub${s}${p} $Rd, $Rn, $imm", + (ADDri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"sub${s}${p} $Rd, $imm", + (ADDri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; + + +def : ARMInstSubst<"adc${s}${p} $Rd, $Rn, $imm", + (SBCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"adc${s}${p} $Rdn, $imm", + (SBCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"sbc${s}${p} $Rd, $Rn, $imm", + (ADCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstSubst<"sbc${s}${p} $Rdn, $imm", + (ADCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; + // Same for CMP <--> CMN via mod_imm_neg -def : ARMInstAlias<"cmp${p} $Rd, $imm", +def : ARMInstSubst<"cmp${p} $Rd, $imm", (CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; -def : ARMInstAlias<"cmn${p} $Rd, $imm", +def : ARMInstSubst<"cmn${p} $Rd, $imm", (CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; // The shifter forms of the MOV instruction are aliased to the ASR, LSL, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index b5fa8e999e2a..681e235d78f0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -7139,6 +7139,17 @@ let Predicates = [IsBE] in { (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; } +def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; +def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), + (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; + //===----------------------------------------------------------------------===// // Assembler aliases // diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index a681f64b05e6..f2f426e86701 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -19,7 +19,7 @@ def imm_sr_XFORM: SDNodeXForm<imm, [{ unsigned Imm = N->getZExtValue(); return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32); }]>; -def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; } +def ThumbSRImmAsmOperand: ImmAsmOperand<1,32> { let Name = "ImmThumbSR"; } def imm_sr : Operand<i32>, PatLeaf<(imm), [{ uint64_t Imm = N->getZExtValue(); return Imm > 0 && Imm <= 32; @@ -28,22 +28,31 @@ def imm_sr : Operand<i32>, PatLeaf<(imm), [{ let ParserMatchClass = ThumbSRImmAsmOperand; } -def imm_comp_XFORM : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), SDLoc(N), - MVT::i32); -}]>; - def imm0_7_neg : PatLeaf<(i32 imm), [{ return (uint32_t)-N->getZExtValue() < 8; }], imm_neg_XFORM>; +def ThumbModImmNeg1_7AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg1_7"; } +def mod_imm1_7_neg : Operand<i32>, PatLeaf<(imm), [{ + unsigned Value = -(unsigned)N->getZExtValue(); + return 0 < Value && Value < 8; + }], imm_neg_XFORM> { + let ParserMatchClass = ThumbModImmNeg1_7AsmOperand; +} + +def ThumbModImmNeg8_255AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg8_255"; } +def mod_imm8_255_neg : Operand<i32>, PatLeaf<(imm), [{ + unsigned Value = -(unsigned)N->getZExtValue(); + return 7 < Value && Value < 256; + }], imm_neg_XFORM> { + let ParserMatchClass = ThumbModImmNeg8_255AsmOperand; +} + + def imm0_255_comp : PatLeaf<(i32 imm), [{ return ~((uint32_t)N->getZExtValue()) < 256; }]>; -def imm8_255 : ImmLeaf<i32, [{ - return Imm >= 8 && Imm < 256; -}]>; def imm8_255_neg : PatLeaf<(i32 imm), [{ unsigned Val = -N->getZExtValue(); return Val >= 8 && Val < 256; @@ -407,9 +416,9 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), let DecoderMethod = "DecodeThumbAddSPImm"; } -def : tInstAlias<"add${p} sp, $imm", +def : tInstSubst<"add${p} sp, $imm", (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; -def : tInstAlias<"add${p} sp, sp, $imm", +def : tInstSubst<"add${p} sp, sp, $imm", (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; // Can optionally specify SP as a three operand instruction. @@ -910,7 +919,7 @@ let isAdd = 1 in { def tADC : // A8.6.2 T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "adc", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; + []>, Sched<[WriteALU]>; // Add immediate def tADDi3 : // A8.6.4 T1 @@ -938,6 +947,43 @@ let isAdd = 1 in { "add", "\t$Rd, $Rn, $Rm", [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; + /// Similar to the above except these set the 's' bit so the + /// instruction modifies the CPSR register. + /// + /// These opcodes will be converted to the real non-S opcodes by + /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. + let hasPostISelHook = 1, Defs = [CPSR] in { + let isCommutable = 1 in + def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + 2, IIC_iALUr, + [(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm, + CPSR))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + def tADDSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), + 2, IIC_iALUi, + [(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rm, + imm0_7:$imm3))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + def tADDSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8), + 2, IIC_iALUi, + [(set tGPR:$Rdn, CPSR, (ARMaddc tGPR:$Rn, + imm8_255:$imm8))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + let isCommutable = 1 in + def tADDSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), + 2, IIC_iALUr, + [(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rn, + tGPR:$Rm))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + } + let hasSideEffects = 0 in def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr, "add", "\t$Rdn, $Rm", []>, @@ -951,6 +997,12 @@ let isAdd = 1 in { } } +def : tInstSubst<"sub${s}${p} $rd, $rn, $imm", + (tADDi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>; +def : tInstSubst<"sub${s}${p} $rdn, $imm", + (tADDi8 tGPR:$rdn, s_cc_out:$s, mod_imm8_255_neg:$imm, pred:$p)>; + + // AND register let isCommutable = 1 in def tAND : // A8.6.12 @@ -1197,7 +1249,7 @@ def tSBC : // A8.6.151 T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "sbc", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>, + []>, Sched<[WriteALU]>; // Subtract immediate @@ -1218,6 +1270,14 @@ def tSUBi8 : // A8.6.210 T2 [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>, Sched<[WriteALU]>; +def : tInstSubst<"add${s}${p} $rd, $rn, $imm", + (tSUBi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>; + + +def : tInstSubst<"add${s}${p} $rdn, $imm", + (tSUBi8 tGPR:$rdn, s_cc_out:$s, mod_imm8_255_neg:$imm, pred:$p)>; + + // Subtract register def tSUBrr : // A8.6.212 T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), @@ -1226,6 +1286,41 @@ def tSUBrr : // A8.6.212 [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; +/// Similar to the above except these set the 's' bit so the +/// instruction modifies the CPSR register. +/// +/// These opcodes will be converted to the real non-S opcodes by +/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. +let hasPostISelHook = 1, Defs = [CPSR] in { + def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + 2, IIC_iALUr, + [(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm, + CPSR))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + def tSUBSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), + 2, IIC_iALUi, + [(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rm, + imm0_7:$imm3))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + def tSUBSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8), + 2, IIC_iALUi, + [(set tGPR:$Rdn, CPSR, (ARMsubc tGPR:$Rn, + imm8_255:$imm8))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; + + def tSUBSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), + 2, IIC_iALUr, + [(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rn, + tGPR:$Rm))]>, + Requires<[IsThumb1Only]>, + Sched<[WriteALU]>; +} + // Sign-extend byte def tSXTB : // A8.6.222 T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), @@ -1386,22 +1481,6 @@ def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8), def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm), (tCMPr tGPR:$Rn, tGPR:$Rm)>; -// Add with carry -def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs), - (tADDi3 tGPR:$lhs, imm0_7:$rhs)>; -def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs), - (tADDi8 tGPR:$lhs, imm8_255:$rhs)>; -def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs), - (tADDrr tGPR:$lhs, tGPR:$rhs)>; - -// Subtract with carry -def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs), - (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>; -def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs), - (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>; -def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs), - (tSUBrr tGPR:$lhs, tGPR:$rhs)>; - // Bswap 16 with load/store def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)), (tREV16 (tLDRHi t_addrmode_is2:$addr))>; @@ -1477,7 +1556,7 @@ def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>; // post-inc LDR -> LDM r0!, {r1}. The way operands are layed out in LDMs is // different to how ISel expects them for a post-inc load, so use a pseudo // and expand it just after ISel. -let usesCustomInserter = 1, +let usesCustomInserter = 1, mayLoad =1, Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb), (ins rGPR:$Rn, pred:$p), @@ -1547,7 +1626,7 @@ def : T1Pat<(i32 thumb_immshifted:$src), (thumb_immshifted_shamt imm:$src))>; def : T1Pat<(i32 imm0_255_comp:$src), - (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>; + (tMVN (tMOVi8 (imm_not_XFORM imm:$src)))>; def : T1Pat<(i32 imm256_510:$src), (tADDi8 (tMOVi8 255), diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index 603d66403e65..f5b673b78ad7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -76,7 +76,11 @@ def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{ // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. -def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; } +def t2_so_imm_asmoperand : AsmOperandClass { + let Name = "T2SOImm"; + let RenderMethod = "addImmOperands"; + +} def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getT2SOImmVal(Imm) != -1; }]> { @@ -110,15 +114,14 @@ def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{ // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } -def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - int64_t Value = -(int)N->getZExtValue(); - return Value && ARM_AM::getT2SOImmVal(Value) != -1; +def t2_so_imm_neg : Operand<i32>, ImmLeaf<i32, [{ + return Imm && ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1; }], t2_so_imm_neg_XFORM> { let ParserMatchClass = t2_so_imm_neg_asmoperand; } -/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. -def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; } +/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0,4095]. +def imm0_4095_asmoperand: ImmAsmOperand<0,4095> { let Name = "Imm0_4095"; } def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 4096; }]> { @@ -139,7 +142,7 @@ def imm1_255_neg : PatLeaf<(i32 imm), [{ def imm0_255_not : PatLeaf<(i32 imm), [{ return (uint32_t)(~N->getZExtValue()) < 255; -}], imm_comp_XFORM>; +}], imm_not_XFORM>; def lo5AllOne : PatLeaf<(i32 imm), [{ // Returns true if all low 5-bits are 1. @@ -538,7 +541,8 @@ class T2FourReg<dag oops, dag iops, InstrItinClass itin, class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, string opc, list<dag> pattern> : T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64, - opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern> { + opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rn; @@ -556,7 +560,8 @@ class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, string opc> : T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, opc, "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi"> { + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rn; @@ -977,7 +982,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> { def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii, opc, ".w\t$Rt, $addr", - [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]>, + Sched<[WriteLd]> { bits<4> Rt; bits<17> addr; let Inst{31-25} = 0b1111100; @@ -993,7 +999,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, } def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", - [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>, + Sched<[WriteLd]> { bits<4> Rt; bits<13> addr; let Inst{31-27} = 0b11111; @@ -1015,7 +1022,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, } def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", - [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> { + [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]>, + Sched<[WriteLd]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; @@ -1039,7 +1047,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, // from the PC. def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", - [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { + [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]>, + Sched<[WriteLd]> { let isReMaterializable = 1; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -1065,7 +1074,8 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii, opc, ".w\t$Rt, $addr", - [(opnode target:$Rt, t2addrmode_imm12:$addr)]> { + [(opnode target:$Rt, t2addrmode_imm12:$addr)]>, + Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0001; let Inst{22-21} = opcod; @@ -1082,7 +1092,8 @@ multiclass T2I_st<bits<2> opcod, string opc, } def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", - [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> { + [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>, + Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; @@ -1102,7 +1113,8 @@ multiclass T2I_st<bits<2> opcod, string opc, } def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", - [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> { + [(opnode target:$Rt, t2addrmode_so_reg:$addr)]>, + Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; @@ -1121,28 +1133,10 @@ multiclass T2I_st<bits<2> opcod, string opc, /// T2I_ext_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> - : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr, - opc, ".w\t$Rd, $Rm$rot", - [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - - bits<2> rot; - let Inst{5-4} = rot{1-0}; // rotate -} - -// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. -class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> - : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), - IIC_iEXTr, opc, "\t$Rd, $Rm$rot", - [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { +class T2I_ext_rrot_base<bits<3> opcod, dag iops, dag oops, + string opc, string oprs, + list<dag> pattern> + : T2TwoReg<iops, oops, IIC_iEXTr, opc, oprs, pattern> { bits<2> rot; let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -1150,46 +1144,34 @@ class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> let Inst{19-16} = 0b1111; // Rn let Inst{15-12} = 0b1111; let Inst{7} = 1; - let Inst{5-4} = rot; -} - -// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern -// supported yet. -class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> - : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr, - opc, "\t$Rd, $Rm$rot", []>, - Requires<[IsThumb2, HasT2ExtractPack]> { - bits<2> rot; - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{19-16} = 0b1111; // Rn - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = rot; -} + let Inst{5-4} = rot; // rotate +} + +class T2I_ext_rrot<bits<3> opcod, string opc> + : T2I_ext_rrot_base<opcod, + (outs rGPR:$Rd), + (ins rGPR:$Rm, rot_imm:$rot), + opc, ".w\t$Rd, $Rm$rot", []>, + Requires<[IsThumb2]>, + Sched<[WriteALU, ReadALU]>; + +// UXTB16, SXTB16 - Requires HasDSP, does not need the .w qualifier. +class T2I_ext_rrot_xtb16<bits<3> opcod, string opc> + : T2I_ext_rrot_base<opcod, + (outs rGPR:$Rd), + (ins rGPR:$Rm, rot_imm:$rot), + opc, "\t$Rd, $Rm$rot", []>, + Requires<[HasDSP, IsThumb2]>, + Sched<[WriteALU, ReadALU]>; /// T2I_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> +class T2I_exta_rrot<bits<3> opcod, string opc> : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot), - IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", - [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { - bits<2> rot; - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0100; - let Inst{22-20} = opcod; - let Inst{15-12} = 0b1111; - let Inst{7} = 1; - let Inst{5-4} = rot; -} - -class T2I_exta_rrot_np<bits<3> opcod, string opc> - : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot), IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []>, - Requires<[HasT2ExtractPack, IsThumb2]> { + Requires<[HasDSP, IsThumb2]>, + Sched<[WriteALU, ReadALU]> { bits<2> rot; let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -1279,7 +1261,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_imm8s4:$addr), - IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>; + IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>, + Sched<[WriteLd]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 // zextload i1 -> zextload i8 @@ -1333,17 +1316,20 @@ let mayLoad = 1, hasSideEffects = 0 in { def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, - "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_iu, - "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), @@ -1353,41 +1339,45 @@ def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []>; + []>, Sched<[WriteLd]>; def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []>; + []>, Sched<[WriteLd]>; def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; } // mayLoad = 1, hasSideEffects = 0 // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110). // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii> : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc, - "\t$Rt, $addr", []> { + "\t$Rt, $addr", []>, Sched<[WriteLd]> { bits<4> Rt; bits<13> addr; let Inst{31-27} = 0b11111; @@ -1431,11 +1421,14 @@ class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops, } def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt), - (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>; + (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>, + Sched<[WriteLd]>; def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt), - (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>; + (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>, + Sched<[WriteLd]>; def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt), - (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>; + (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>, + Sched<[WriteLd]>; // Store defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, store>; @@ -1448,7 +1441,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), - IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>; + IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>, + Sched<[WriteST]>; // Indexed stores @@ -1457,19 +1451,22 @@ def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), (ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>, + Sched<[WriteST]>; def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "strh", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>, + Sched<[WriteST]>; def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, "strb", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>, + Sched<[WriteST]>; } // mayStore = 1, hasSideEffects = 0 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), @@ -1480,7 +1477,8 @@ def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, (post_store GPRnopc:$Rt, addr_offset_none:$Rn, - t2am_imm8_offset:$offset))]>; + t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, addr_offset_none:$Rn, @@ -1490,7 +1488,8 @@ def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, (post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn, - t2am_imm8_offset:$offset))]>; + t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, addr_offset_none:$Rn, @@ -1500,7 +1499,8 @@ def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb), "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, (post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn, - t2am_imm8_offset:$offset))]>; + t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; // Pseudo-instructions for pattern matching the pre-indexed stores. We can't // put the patterns on the instruction definitions directly as ISel wants @@ -1513,17 +1513,20 @@ def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), 4, IIC_iStore_ru, [(set GPRnopc:$Rn_wb, - (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; + (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), 4, IIC_iStore_ru, [(set GPRnopc:$Rn_wb, - (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; + (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), (ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p), 4, IIC_iStore_ru, [(set GPRnopc:$Rn_wb, - (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; + (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>, + Sched<[WriteST]>; } // STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly @@ -1531,7 +1534,7 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 class T2IstT<bits<2> type, string opc, InstrItinClass ii> : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, - "\t$Rt, $addr", []> { + "\t$Rt, $addr", []>, Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = 0; // not signed @@ -1557,7 +1560,8 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; let mayLoad = 1 in def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), (ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru, - "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { + "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []>, + Sched<[WriteLd]> { let DecoderMethod = "DecodeT2LDRDPreInstruction"; } @@ -1565,13 +1569,13 @@ let mayLoad = 1 in def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm", - "$addr.base = $wb", []>; + "$addr.base = $wb", []>, Sched<[WriteLd]>; let mayStore = 1 in def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", - "$addr.base = $wb", []> { + "$addr.base = $wb", []>, Sched<[WriteST]> { let DecoderMethod = "DecodeT2STRDPreInstruction"; } @@ -1580,12 +1584,13 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr, t2am_imm8s4_offset:$imm), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm", - "$addr.base = $wb", []>; + "$addr.base = $wb", []>, Sched<[WriteST]>; class T2Istrrel<bits<2> bit54, dag oops, dag iops, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc, - asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]> { + asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]>, + Sched<[WriteST]> { bits<4> Rt; bits<4> addr; @@ -1861,7 +1866,7 @@ defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; // let hasSideEffects = 0 in -def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr, +def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rm), IIC_iMOVr, "mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -1870,11 +1875,11 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr, let Inst{14-12} = 0b000; let Inst{7-4} = 0b0000; } -def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, +def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, zero_reg)>; -def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, +def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, CPSR)>; -def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, +def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, CPSR)>; // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. @@ -1926,10 +1931,11 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, def : InstAlias<"mov${p} $Rd, $imm", (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p), 0>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteALU]>; def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; let Constraints = "$src = $Rd" in { def t2MOVTi16 : T2I<(outs rGPR:$Rd), @@ -1969,31 +1975,39 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; // Sign extenders -def t2SXTB : T2I_ext_rrot<0b100, "sxtb", - UnOpFrag<(sext_inreg node:$Src, i8)>>; -def t2SXTH : T2I_ext_rrot<0b000, "sxth", - UnOpFrag<(sext_inreg node:$Src, i16)>>; -def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">; +def t2SXTB : T2I_ext_rrot<0b100, "sxtb">; +def t2SXTH : T2I_ext_rrot<0b000, "sxth">; +def t2SXTB16 : T2I_ext_rrot_xtb16<0b010, "sxtb16">; + +def t2SXTAB : T2I_exta_rrot<0b100, "sxtab">; +def t2SXTAH : T2I_exta_rrot<0b000, "sxtah">; +def t2SXTAB16 : T2I_exta_rrot<0b010, "sxtab16">; + +def : T2Pat<(sext_inreg (rotr rGPR:$Rn, rot_imm:$rot), i8), + (t2SXTB rGPR:$Rn, rot_imm:$rot)>; +def : T2Pat<(sext_inreg (rotr rGPR:$Rn, rot_imm:$rot), i16), + (t2SXTH rGPR:$Rn, rot_imm:$rot)>; +def : Thumb2DSPPat<(add rGPR:$Rn, + (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i8)), + (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(add rGPR:$Rn, + (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i16)), + (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; -def t2SXTAB : T2I_exta_rrot<0b100, "sxtab", - BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; -def t2SXTAH : T2I_exta_rrot<0b000, "sxtah", - BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; -def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">; // A simple right-shift can also be used in most cases (the exception is the // SXTH operations with a rotate of 24: there the non-contiguous bits are // relevant). -def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg +def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)), (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; -def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg +def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)), (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; -def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg +def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg (rotr rGPR:$Rm, (i32 24)), i16)), (t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>; -def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg +def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg (or (srl rGPR:$Rm, (i32 24)), (shl rGPR:$Rm, (i32 8))), i16)), (t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>; @@ -2001,12 +2015,16 @@ def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg // Zero extenders let AddedComplexity = 16 in { -def t2UXTB : T2I_ext_rrot<0b101, "uxtb", - UnOpFrag<(and node:$Src, 0x000000FF)>>; -def t2UXTH : T2I_ext_rrot<0b001, "uxth", - UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", - UnOpFrag<(and node:$Src, 0x00FF00FF)>>; +def t2UXTB : T2I_ext_rrot<0b101, "uxtb">; +def t2UXTH : T2I_ext_rrot<0b001, "uxth">; +def t2UXTB16 : T2I_ext_rrot_xtb16<0b011, "uxtb16">; + +def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x000000FF), + (t2UXTB rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x0000FFFF), + (t2UXTH rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF), + (t2UXTB16 rGPR:$Rm, rot_imm:$rot)>; // FIXME: This pattern incorrectly assumes the shl operator is a rotate. // The transformation should probably be done as a combiner action @@ -2014,21 +2032,25 @@ def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", // eight bits of the source into the lower eight bits of the result. //def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF), // (t2UXTB16 rGPR:$Src, 3)>, -// Requires<[HasT2ExtractPack, IsThumb2]>; +// Requires<[HasDSP, IsThumb2]>; def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF), (t2UXTB16 rGPR:$Src, 1)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; -def t2UXTAB : T2I_exta_rrot<0b101, "uxtab", - BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; -def t2UXTAH : T2I_exta_rrot<0b001, "uxtah", - BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; -def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">; +def t2UXTAB : T2I_exta_rrot<0b101, "uxtab">; +def t2UXTAH : T2I_exta_rrot<0b001, "uxtah">; +def t2UXTAB16 : T2I_exta_rrot<0b011, "uxtab16">; -def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), +def : Thumb2DSPPat<(add rGPR:$Rn, (and (rotr rGPR:$Rm, rot_imm:$rot), + 0x00FF)), + (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(add rGPR:$Rn, (and (rotr rGPR:$Rm, rot_imm:$rot), + 0xFFFF)), + (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)), (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; -def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), +def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; } @@ -2060,6 +2082,19 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>; } +def : t2InstSubst<"adc${s}${p} $rd, $rn, $imm", + (t2SBCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"sbc${s}${p} $rd, $rn, $imm", + (t2ADCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>; + +def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm", + (t2SUBri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"addw${p} $rd, $rn, $imm", + (t2SUBri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>; +def : t2InstSubst<"sub${s}${p}.w $rd, $rn, $imm", + (t2ADDri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"subw${p} $rd, $rn, $imm", + (t2ADDri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>; // RSB defm t2RSB : T2I_rbin_irs <0b1110, "rsb", sub>; @@ -2230,70 +2265,52 @@ def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), Requires<[IsThumb2, HasDSP]>; // Signed/Unsigned saturate. -class T2SatI<dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { +class T2SatI<dag iops, string opc, string asm> + : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, []> { bits<4> Rd; bits<4> Rn; bits<5> sat_imm; - bits<7> sh; + bits<6> sh; - let Inst{11-8} = Rd; + let Inst{31-24} = 0b11110011; + let Inst{21} = sh{5}; + let Inst{20} = 0; let Inst{19-16} = Rn; - let Inst{4-0} = sat_imm; - let Inst{21} = sh{5}; + let Inst{15} = 0; let Inst{14-12} = sh{4-2}; - let Inst{7-6} = sh{1-0}; + let Inst{11-8} = Rd; + let Inst{7-6} = sh{1-0}; + let Inst{5} = 0; + let Inst{4-0} = sat_imm; } -def t2SSAT: T2SatI< - (outs rGPR:$Rd), - (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), - NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1100; - let Inst{20} = 0; - let Inst{15} = 0; +def t2SSAT: T2SatI<(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), + "ssat", "\t$Rd, $sat_imm, $Rn$sh">, + Requires<[IsThumb2]> { + let Inst{23-22} = 0b00; let Inst{5} = 0; } -def t2SSAT16: T2SatI< - (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary, - "ssat16", "\t$Rd, $sat_imm, $Rn", []>, - Requires<[IsThumb2, HasDSP]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1100; - let Inst{20} = 0; - let Inst{15} = 0; - let Inst{21} = 1; // sh = '1' - let Inst{14-12} = 0b000; // imm3 = '000' - let Inst{7-6} = 0b00; // imm2 = '00' - let Inst{5-4} = 0b00; +def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn), + "ssat16", "\t$Rd, $sat_imm, $Rn">, + Requires<[IsThumb2, HasDSP]> { + let Inst{23-22} = 0b00; + let sh = 0b100000; + let Inst{4} = 0; } -def t2USAT: T2SatI< - (outs rGPR:$Rd), - (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), - NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1110; - let Inst{20} = 0; - let Inst{15} = 0; +def t2USAT: T2SatI<(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), + "usat", "\t$Rd, $sat_imm, $Rn$sh">, + Requires<[IsThumb2]> { + let Inst{23-22} = 0b10; } -def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn), - NoItinerary, - "usat16", "\t$Rd, $sat_imm, $Rn", []>, +def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn), + "usat16", "\t$Rd, $sat_imm, $Rn">, Requires<[IsThumb2, HasDSP]> { - let Inst{31-22} = 0b1111001110; - let Inst{20} = 0; - let Inst{15} = 0; - let Inst{21} = 1; // sh = '1' - let Inst{14-12} = 0b000; // imm3 = '000' - let Inst{7-6} = 0b00; // imm2 = '00' - let Inst{5-4} = 0b00; + let Inst{23-22} = 0b10; + let sh = 0b100000; + let Inst{4} = 0; } def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>; @@ -2305,11 +2322,18 @@ def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), // Shift and rotate Instructions. // -defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, shl>; +defm t2LSL : T2I_sh_ir<0b00, "lsl", imm1_31, shl>; defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, srl>; defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, sra>; defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, rotr>; +// LSL #0 is actually MOV, and has slightly different permitted registers to +// LSL with non-zero shift +def : t2InstAlias<"lsl${s}${p} $Rd, $Rm, #0", + (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"lsl${s}${p}.w $Rd, $Rm, #0", + (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>; + // (rotr x, (and y, 0x...1f)) ==> (ROR x, y) def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), (t2RORrr rGPR:$lhs, rGPR:$rhs)>; @@ -2547,7 +2571,8 @@ def : T2Pat<(t2_so_imm_not:$src), let isCommutable = 1 in def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> { + [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2558,7 +2583,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, class T2FourRegMLA<bits<4> op7_4, string opc, list<dag> pattern> : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, - Requires<[IsThumb2, UseMulOps]> { + Requires<[IsThumb2, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2575,8 +2601,12 @@ def t2MLS: T2FourRegMLA<0b0001, "mls", // Extra precision multiplies with low / high results let hasSideEffects = 0 in { let isCommutable = 1 in { -def t2SMULL : T2MulLong<0b000, 0b0000, "smull", []>; -def t2UMULL : T2MulLong<0b010, 0b0000, "umull", []>; +def t2SMULL : T2MulLong<0b000, 0b0000, "smull", + [(set rGPR:$RdLo, rGPR:$RdHi, + (smullohi rGPR:$Rn, rGPR:$Rm))]>; +def t2UMULL : T2MulLong<0b010, 0b0000, "umull", + [(set rGPR:$RdLo, rGPR:$RdHi, + (umullohi rGPR:$Rn, rGPR:$Rm))]>; } // isCommutable // Multiply + accumulate @@ -2592,7 +2622,8 @@ class T2SMMUL<bits<4> op7_4, string opc, list<dag> pattern> : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, opc, "\t$Rd, $Rn, $Rm", pattern>, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2607,7 +2638,8 @@ class T2FourRegSMMLA<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern> : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = op22_20; @@ -2624,7 +2656,8 @@ class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc, list<dag> pattern> : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, opc, "\t$Rd, $Rn, $Rm", pattern>, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = op22_20; @@ -2645,8 +2678,10 @@ def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb", def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt", [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)), (sra rGPR:$Rm, (i32 16))))]>; -def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>; -def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>; +def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", + [(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>; +def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", + [(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>; def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn), (t2SMULBB rGPR:$Rm, rGPR:$Rn)>; @@ -2659,7 +2694,8 @@ class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc, list<dag> pattern> : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16, opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>, - Requires<[IsThumb2, HasDSP, UseMulOps]> { + Requires<[IsThumb2, HasDSP, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = op22_20; @@ -2680,8 +2716,10 @@ def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb", def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt", [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), (sra rGPR:$Rm, (i32 16)))))]>; -def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>; -def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>; +def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", + [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>; +def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", + [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwt rGPR:$Rn, rGPR:$Rm)))]>; def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)), (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; @@ -2692,25 +2730,32 @@ def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)), (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; -class T2SMLAL<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern> - : T2FourReg_mac<1, op22_20, op7_4, - (outs rGPR:$Ra, rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]>; - // Halfword multiple accumulate long: SMLAL<x><y> -def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>; -def t2SMLALBT : T2SMLAL<0b100, 0b1001, "smlalbt", []>; -def t2SMLALTB : T2SMLAL<0b100, 0b1010, "smlaltb", []>; -def t2SMLALTT : T2SMLAL<0b100, 0b1011, "smlaltt", []>; +def t2SMLALBB : T2MlaLong<0b100, 0b1000, "smlalbb">, + Requires<[IsThumb2, HasDSP]>; +def t2SMLALBT : T2MlaLong<0b100, 0b1001, "smlalbt">, + Requires<[IsThumb2, HasDSP]>; +def t2SMLALTB : T2MlaLong<0b100, 0b1010, "smlaltb">, + Requires<[IsThumb2, HasDSP]>; +def t2SMLALTT : T2MlaLong<0b100, 0b1011, "smlaltt">, + Requires<[IsThumb2, HasDSP]>; + +def : Thumb2DSPPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (t2SMLALBB $Rn, $Rm, $RLo, $RHi)>; +def : Thumb2DSPPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (t2SMLALBT $Rn, $Rm, $RLo, $RHi)>; +def : Thumb2DSPPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (t2SMLALTB $Rn, $Rm, $RLo, $RHi)>; +def : Thumb2DSPPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), + (t2SMLALTT $Rn, $Rm, $RLo, $RHi)>; class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc> : T2ThreeReg_mac<0, op22_20, op7_4, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { let Inst{15-12} = 0b1111; } @@ -2737,7 +2782,8 @@ class T2DualHalfMulAddLong<bits<3> op22_20, bits<4> op7_4, string opc> (outs rGPR:$Ra, rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>, - Requires<[IsThumb2, HasDSP]>; + Requires<[IsThumb2, HasDSP]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def t2SMLALD : T2DualHalfMulAddLong<0b100, 0b1100, "smlald">; def t2SMLALDX : T2DualHalfMulAddLong<0b100, 0b1101, "smlaldx">; @@ -2751,7 +2797,8 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">; def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]> { + Requires<[HasDivide, IsThumb, HasV8MBaseline]>, + Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; let Inst{20} = 0b1; @@ -2762,7 +2809,8 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]> { + Requires<[HasDivide, IsThumb, HasV8MBaseline]>, + Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; let Inst{20} = 0b1; @@ -2819,7 +2867,7 @@ def t2PKHBT : T2ThreeReg< [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF), (and (shl rGPR:$Rm, pkh_lsl_amt:$sh), 0xFFFF0000)))]>, - Requires<[HasT2ExtractPack, IsThumb2]>, + Requires<[HasDSP, IsThumb2]>, Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -2835,10 +2883,10 @@ def t2PKHBT : T2ThreeReg< // Alternate cases for PKHBT where identities eliminate some nodes. def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)), (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)), (t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and // will match the pattern below. @@ -2848,7 +2896,7 @@ def t2PKHTB : T2ThreeReg< [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000), (and (sra rGPR:$Rm, pkh_asr_amt:$sh), 0xFFFF)))]>, - Requires<[HasT2ExtractPack, IsThumb2]>, + Requires<[HasDSP, IsThumb2]>, Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -2867,14 +2915,14 @@ def t2PKHTB : T2ThreeReg< // pkhtb src1, src2, asr (17..31). def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16:$sh)), (t2PKHTB rGPR:$src1, rGPR:$src2, imm16:$sh)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (sra rGPR:$src2, imm16_31:$sh)), (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)), (t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; //===----------------------------------------------------------------------===// // CRC32 Instructions @@ -4216,13 +4264,13 @@ def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>, def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>, Requires<[IsThumb2]>; def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)), (t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)), (t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; } def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>, @@ -4231,10 +4279,10 @@ def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>, Requires<[IsThumb2]>; def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)), (t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)), (t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; // Atomic load/store patterns def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr), @@ -4325,26 +4373,26 @@ def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm", pred:$p, cc_out:$s)>; // add w/ negative immediates is just a sub. -def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", +def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"add${p} $Rd, $Rn, $imm", +def : t2InstSubst<"add${p} $Rd, $Rn, $imm", (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; -def : t2InstAlias<"add${s}${p} $Rdn, $imm", +def : t2InstSubst<"add${s}${p} $Rdn, $imm", (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"add${p} $Rdn, $imm", +def : t2InstSubst<"add${p} $Rdn, $imm", (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; -def : t2InstAlias<"add${s}${p}.w $Rd, $Rn, $imm", +def : t2InstSubst<"add${s}${p}.w $Rd, $Rn, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"addw${p} $Rd, $Rn, $imm", +def : t2InstSubst<"addw${p} $Rd, $Rn, $imm", (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; -def : t2InstAlias<"add${s}${p}.w $Rdn, $imm", +def : t2InstSubst<"add${s}${p}.w $Rdn, $imm", (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"addw${p} $Rdn, $imm", +def : t2InstSubst<"addw${p} $Rdn, $imm", (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; @@ -4431,10 +4479,10 @@ def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm", // input operands swapped when the shift amount is zero (i.e., unspecified). def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm", (t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm", (t2PKHBT rGPR:$Rd, rGPR:$Rm, rGPR:$Rn, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; // PUSH/POP aliases for STM/LDM def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; @@ -4513,16 +4561,16 @@ def : t2InstAlias<"strh${p} $Rt, $addr", // Extend instruction optional rotate operand. def : InstAlias<"sxtab${p} $Rd, $Rn, $Rm", (t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"sxtah${p} $Rd, $Rn, $Rm", (t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"sxtab16${p} $Rd, $Rn, $Rm", (t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"sxtb16${p} $Rd, $Rm", (t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : t2InstAlias<"sxtb${p} $Rd, $Rm", (t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; @@ -4535,16 +4583,16 @@ def : t2InstAlias<"sxth${p}.w $Rd, $Rm", def : InstAlias<"uxtab${p} $Rd, $Rn, $Rm", (t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"uxtah${p} $Rd, $Rn, $Rm", (t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"uxtab16${p} $Rd, $Rn, $Rm", (t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : InstAlias<"uxtb16${p} $Rd, $Rm", (t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : t2InstAlias<"uxtb${p} $Rd, $Rm", (t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>; @@ -4560,7 +4608,7 @@ def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot", (t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; def : InstAlias<"uxtb16${p} $Rd, $Rm$rot", (t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : t2InstAlias<"uxth${p} $Rd, $Rm$rot", (t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; @@ -4568,41 +4616,41 @@ def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot", (t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; def : InstAlias<"sxtb16${p} $Rd, $Rm$rot", (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>, - Requires<[HasT2ExtractPack, IsThumb2]>; + Requires<[HasDSP, IsThumb2]>; def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; // "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like // for isel. -def : t2InstAlias<"mov${p} $Rd, $imm", +def : t2InstSubst<"mov${p} $Rd, $imm", (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; -def : t2InstAlias<"mvn${p} $Rd, $imm", - (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; +def : t2InstSubst<"mvn${s}${p} $Rd, $imm", + (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>; // Same for AND <--> BIC -def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm", +def : t2InstSubst<"bic${s}${p} $Rd, $Rn, $imm", (t2ANDri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"bic${s}${p} $Rdn, $imm", +def : t2InstSubst<"bic${s}${p} $Rdn, $imm", (t2ANDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm", +def : t2InstSubst<"and${s}${p} $Rd, $Rn, $imm", (t2BICri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"and${s}${p} $Rdn, $imm", +def : t2InstSubst<"and${s}${p} $Rdn, $imm", (t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; // Likewise, "add Rd, t2_so_imm_neg" -> sub -def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", +def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; -def : t2InstAlias<"add${s}${p} $Rd, $imm", +def : t2InstSubst<"add${s}${p} $Rd, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; // Same for CMP <--> CMN via t2_so_imm_neg -def : t2InstAlias<"cmp${p} $Rd, $imm", +def : t2InstSubst<"cmp${p} $Rd, $imm", (t2CMNri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; -def : t2InstAlias<"cmn${p} $Rd, $imm", +def : t2InstSubst<"cmn${p} $Rd, $imm", (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; @@ -4616,6 +4664,8 @@ def : t2InstAlias<"neg${s}${p} $Rd, $Rm", // MOV so_reg assembler pseudos. InstAlias isn't expressive enough for // these, unfortunately. +// FIXME: LSL #0 in the shift should allow SP to be used as either the +// source or destination (but not both). def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift", (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift", diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index e99048645685..0f225156d4ca 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -11,14 +11,17 @@ // //===----------------------------------------------------------------------===// -def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; +def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>; def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, + SDTCisVT<2, f64>]>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; -def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; +def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; +def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>; //===----------------------------------------------------------------------===// // Operand Definitions. @@ -336,13 +339,15 @@ let TwoOperandAliasConstraint = "$Dn = $Dd" in def VADDD : ADbI<0b11100, 0b11, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPALU64]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> { + [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPALU32]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -352,19 +357,22 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in def VADDH : AHbI<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPALU32]>; let TwoOperandAliasConstraint = "$Dn = $Dd" in def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPALU64]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { + [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPALU32]>{ // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -374,37 +382,43 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in def VSUBH : AHbI<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPALU32]>; let TwoOperandAliasConstraint = "$Dn = $Dd" in def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPDIV64]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VDIVS : ASbI<0b11101, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>; + [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPDIV32]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VDIVH : AHbI<0b11101, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPDIV32]>; let TwoOperandAliasConstraint = "$Dn = $Dd" in def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>, + Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>; let TwoOperandAliasConstraint = "$Sn = $Sd" in def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { + [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -414,17 +428,20 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in def VMULH : AHbI<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>; def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm", - [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>; + [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>, + Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>; def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", - [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { + [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -433,7 +450,8 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, def VNMULH : AHbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm", - []>; + []>, + Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>; multiclass vsel_inst<string op, bits<2> opc, int CC> { let DecoderNamespace = "VFPV8", PostEncoderMethod = "", @@ -501,12 +519,12 @@ let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", - [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>; + [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>; def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", - [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { + [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -517,17 +535,15 @@ def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm", []>; - -// FIXME: Verify encoding after integrated assembler is working. def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", - [/* For disassembly only; pattern left blank */]>; + [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>; def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]> { + [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -566,7 +582,7 @@ let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", - [(arm_cmpfp0 (f64 DPR:$Dd))]> { + [(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -574,7 +590,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", - [(arm_cmpfp0 SPR:$Sd)]> { + [(arm_cmpfp0 SPR:$Sd, (i32 1))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -591,11 +607,10 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, let Inst{5} = 0; } -// FIXME: Verify encoding after integrated assembler is working. def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", - [/* For disassembly only; pattern left blank */]> { + [(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; } @@ -603,7 +618,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$Sd), IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", - [/* For disassembly only; pattern left blank */]> { + [(arm_cmpfp0 SPR:$Sd, (i32 0))]> { let Inst{3-0} = 0b0000; let Inst{5} = 0; @@ -624,7 +639,8 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", - [(set DPR:$Dd, (fpextend SPR:$Sm))]> { + [(set DPR:$Dd, (fpextend SPR:$Sm))]>, + Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Dd; bits<5> Sm; @@ -641,7 +657,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, // Special case encoding: bits 11-8 is 0b1011. def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (fpround DPR:$Dm))]> { + [(set SPR:$Sd, (fpround DPR:$Dm))]>, + Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Sd; bits<5> Dm; @@ -663,31 +680,35 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // Between half, single and double-precision. For disassembly only. -// FIXME: Verify encoding after integrated assembler is working. def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, - Requires<[HasFP16]>; + Requires<[HasFP16]>, + Sched<[WriteFPCVT]>; def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", - []>, Requires<[HasFPARMv8, HasDPVFP]> { + []>, Requires<[HasFPARMv8, HasDPVFP]>, + Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Sm; @@ -946,12 +967,14 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", - [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>; + [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>, + Sched<[WriteFPSQRT64]>; def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", - [(set SPR:$Sd, (fsqrt SPR:$Sm))]>; + [(set SPR:$Sd, (fsqrt SPR:$Sm))]>, + Sched<[WriteFPSQRT32]>; def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), @@ -987,7 +1010,8 @@ def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0, def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$Rt), (ins SPR:$Sn), IIC_fpMOVSI, "vmov", "\t$Rt, $Sn", - [(set GPR:$Rt, (bitconvert SPR:$Sn))]> { + [(set GPR:$Rt, (bitconvert SPR:$Sn))]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<4> Rt; bits<5> Sn; @@ -1010,7 +1034,8 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$Sn), (ins GPR:$Rt), IIC_fpMOVIS, "vmov", "\t$Sn, $Rt", [(set SPR:$Sn, (bitconvert GPR:$Rt))]>, - Requires<[HasVFP2, UseVMOVSR]> { + Requires<[HasVFP2, UseVMOVSR]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Sn; bits<4> Rt; @@ -1032,7 +1057,8 @@ let hasSideEffects = 0 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm), IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm", - [/* FIXME: Can't write pattern for multiple result instr*/]> { + [(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Dm; bits<4> Rt; @@ -1059,7 +1085,8 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, def VMOVRRS : AVConv3I<0b11000101, 0b1010, (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2), IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2", - [/* For disassembly only; pattern left blank */]> { + [/* For disassembly only; pattern left blank */]>, + Sched<[WriteFPMOV]> { bits<5> src1; bits<4> Rt; bits<4> Rt2; @@ -1085,7 +1112,8 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, def VMOVDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2), IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2", - [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> { + [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Dm; bits<4> Rt; @@ -1128,7 +1156,8 @@ let hasSideEffects = 0 in def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", - [/* For disassembly only; pattern left blank */]> { + [/* For disassembly only; pattern left blank */]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> dst1; bits<4> src1; @@ -1154,7 +1183,8 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001, (outs GPR:$Rt), (ins SPR:$Sn), IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn", []>, - Requires<[HasFullFP16]> { + Requires<[HasFullFP16]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<4> Rt; bits<5> Sn; @@ -1173,7 +1203,8 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001, (outs SPR:$Sn), (ins GPR:$Rt), IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt", []>, - Requires<[HasFullFP16]> { + Requires<[HasFullFP16]>, + Sched<[WriteFPMOV]> { // Instruction operands. bits<5> Sn; bits<4> Rt; @@ -1254,7 +1285,8 @@ class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // s32 } @@ -1269,7 +1301,8 @@ let Predicates=[HasVFP2, HasDPVFP] in { def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd),(ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // s32 // Some single precision VFP instructions may be executed on both NEON and @@ -1286,14 +1319,16 @@ def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // s32 } def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // u32 } @@ -1308,7 +1343,8 @@ let Predicates=[HasVFP2, HasDPVFP] in { def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // u32 // Some single precision VFP instructions may be executed on both NEON and @@ -1325,7 +1361,8 @@ def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // u32 } @@ -1390,7 +1427,8 @@ class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } @@ -1405,7 +1443,8 @@ let Predicates=[HasVFP2, HasDPVFP] in { def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1423,14 +1462,16 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))), def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } @@ -1445,7 +1486,8 @@ let Predicates=[HasVFP2, HasDPVFP] in { def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -1463,52 +1505,58 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))), def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 1; // Z bit } // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. let Uses = [FPSCR] in { -// FIXME: Verify encoding after integrated assembler is working. def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{ + [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> { + [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{ + [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> { + [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm", - []> { + []>, + Sched<[WriteFPCVT]> { let Inst{7} = 0; // Z bit } } @@ -1528,8 +1576,7 @@ let Constraints = "$a = $dst" in { class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>, - Sched<[WriteCvtFP]> { + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{0}; @@ -1540,8 +1587,7 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>, - Sched<[WriteCvtFP]> { + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{4}; @@ -1553,26 +1599,31 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), - IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> { + IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1604,45 +1655,54 @@ def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1, def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>; + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; // Fixed-Point to FP: def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>, - Requires<[HasFullFP16]>; + Requires<[HasFullFP16]>, + Sched<[WriteFPCVT]>; def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1650,7 +1710,8 @@ def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0, def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1658,7 +1719,8 @@ def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0, def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1666,7 +1728,8 @@ def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1, def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> { + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1674,19 +1737,23 @@ def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1, def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>; + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>, + Sched<[WriteFPCVT]>; } // End of 'let Constraints = "$a = $dst" in' @@ -1700,7 +1767,8 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1708,7 +1776,8 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1734,7 +1803,8 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1742,7 +1812,8 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1768,7 +1839,8 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1776,7 +1848,8 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1802,14 +1875,16 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1838,7 +1913,8 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFMAS : ASbIn<0b11101, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1846,7 +1922,8 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -1856,7 +1933,8 @@ def VFMAH : AHbI<0b11101, 0b10, 0, 0, IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 2bdbe4fca3de..8d224d6a70fa 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -54,9 +54,21 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, DstSize <= SrcSize)) && "Copy with different width?!"); - assert(RegBank->getID() == ARM::GPRRegBankID && "Unsupported reg bank"); + assert((RegBank->getID() == ARM::GPRRegBankID || + RegBank->getID() == ARM::FPRRegBankID) && + "Unsupported reg bank"); + const TargetRegisterClass *RC = &ARM::GPRRegClass; + if (RegBank->getID() == ARM::FPRRegBankID) { + if (DstSize == 32) + RC = &ARM::SPRRegClass; + else if (DstSize == 64) + RC = &ARM::DPRRegClass; + else + llvm_unreachable("Unsupported destination size"); + } + // No need to constrain SrcReg. It will get constrained when // we hit another of its uses or its defs. // Copies do not have constraints. @@ -68,6 +80,146 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return true; } +static bool selectFAdd(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select fp add without vfp"); + + LLT Ty = MRI.getType(MIB->getOperand(0).getReg()); + unsigned ValSize = Ty.getSizeInBits(); + + if (ValSize == 32) { + if (TII.getSubtarget().useNEONForSinglePrecisionFP()) + return false; + MIB->setDesc(TII.get(ARM::VADDS)); + } else { + assert(ValSize == 64 && "Unsupported size for floating point value"); + if (TII.getSubtarget().isFPOnlySP()) + return false; + MIB->setDesc(TII.get(ARM::VADDD)); + } + MIB.add(predOps(ARMCC::AL)); + + return true; +} + +static bool selectSequence(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select sequence without VFP"); + + // We only support G_SEQUENCE as a way to stick together two scalar GPRs + // into one DPR. + unsigned VReg0 = MIB->getOperand(0).getReg(); + (void)VReg0; + assert(MRI.getType(VReg0).getSizeInBits() == 64 && + RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID && + "Unsupported operand for G_SEQUENCE"); + unsigned VReg1 = MIB->getOperand(1).getReg(); + (void)VReg1; + assert(MRI.getType(VReg1).getSizeInBits() == 32 && + RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported operand for G_SEQUENCE"); + unsigned VReg2 = MIB->getOperand(3).getReg(); + (void)VReg2; + assert(MRI.getType(VReg2).getSizeInBits() == 32 && + RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported operand for G_SEQUENCE"); + + // Remove the operands corresponding to the offsets. + MIB->RemoveOperand(4); + MIB->RemoveOperand(2); + + MIB->setDesc(TII.get(ARM::VMOVDRR)); + MIB.add(predOps(ARMCC::AL)); + + return true; +} + +static bool selectExtract(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select extract without VFP"); + + // We only support G_EXTRACT as a way to break up one DPR into two GPRs. + unsigned VReg0 = MIB->getOperand(0).getReg(); + (void)VReg0; + assert(MRI.getType(VReg0).getSizeInBits() == 32 && + RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported operand for G_EXTRACT"); + unsigned VReg1 = MIB->getOperand(1).getReg(); + (void)VReg1; + assert(MRI.getType(VReg1).getSizeInBits() == 64 && + RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::FPRRegBankID && + "Unsupported operand for G_EXTRACT"); + assert(MIB->getOperand(2).getImm() % 32 == 0 && + "Unsupported operand for G_EXTRACT"); + + // Remove the operands corresponding to the offsets. + MIB->getOperand(2).setImm(MIB->getOperand(2).getImm() / 32); + + MIB->setDesc(TII.get(ARM::VGETLNi32)); + MIB.add(predOps(ARMCC::AL)); + + return true; +} + +/// Select the opcode for simple extensions (that translate to a single SXT/UXT +/// instruction). Extension operations more complicated than that should not +/// invoke this. Returns the original opcode if it doesn't know how to select a +/// better one. +static unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) { + using namespace TargetOpcode; + + if (Size != 8 && Size != 16) + return Opc; + + if (Opc == G_SEXT) + return Size == 8 ? ARM::SXTB : ARM::SXTH; + + if (Opc == G_ZEXT) + return Size == 8 ? ARM::UXTB : ARM::UXTH; + + return Opc; +} + +/// Select the opcode for simple loads and stores. For types smaller than 32 +/// bits, the value will be zero extended. Returns the original opcode if it +/// doesn't know how to select a better one. +static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank, + unsigned Size) { + bool isStore = Opc == TargetOpcode::G_STORE; + + if (RegBank == ARM::GPRRegBankID) { + switch (Size) { + case 1: + case 8: + return isStore ? ARM::STRBi12 : ARM::LDRBi12; + case 16: + return isStore ? ARM::STRH : ARM::LDRH; + case 32: + return isStore ? ARM::STRi12 : ARM::LDRi12; + default: + return Opc; + } + } + + if (RegBank == ARM::FPRRegBankID) { + switch (Size) { + case 32: + return isStore ? ARM::VSTRS : ARM::VLDRS; + case 64: + return isStore ? ARM::VSTRD : ARM::VLDRD; + default: + return Opc; + } + } + + return Opc; +} + bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -84,23 +236,129 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { } MachineInstrBuilder MIB{MF, I}; + bool isSExt = false; using namespace TargetOpcode; switch (I.getOpcode()) { + case G_SEXT: + isSExt = true; + LLVM_FALLTHROUGH; + case G_ZEXT: { + LLT DstTy = MRI.getType(I.getOperand(0).getReg()); + // FIXME: Smaller destination sizes coming soon! + if (DstTy.getSizeInBits() != 32) { + DEBUG(dbgs() << "Unsupported destination size for extension"); + return false; + } + + LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); + unsigned SrcSize = SrcTy.getSizeInBits(); + switch (SrcSize) { + case 1: { + // ZExt boils down to & 0x1; for SExt we also subtract that from 0 + I.setDesc(TII.get(ARM::ANDri)); + MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp()); + + if (isSExt) { + unsigned SExtResult = I.getOperand(0).getReg(); + + // Use a new virtual register for the result of the AND + unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass); + I.getOperand(0).setReg(AndResult); + + auto InsertBefore = std::next(I.getIterator()); + auto SubI = + BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri)) + .addDef(SExtResult) + .addUse(AndResult) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI)) + return false; + } + break; + } + case 8: + case 16: { + unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize); + if (NewOpc == I.getOpcode()) + return false; + I.setDesc(TII.get(NewOpc)); + MIB.addImm(0).add(predOps(ARMCC::AL)); + break; + } + default: + DEBUG(dbgs() << "Unsupported source size for extension"); + return false; + } + break; + } case G_ADD: + case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); - AddDefaultCC(AddDefaultPred(MIB)); + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + break; + case G_FADD: + if (!selectFAdd(MIB, TII, MRI)) + return false; break; case G_FRAME_INDEX: // Add 0 to the given frame index and hope it will eventually be folded into // the user(s). I.setDesc(TII.get(ARM::ADDri)); - AddDefaultCC(AddDefaultPred(MIB.addImm(0))); + MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp()); break; - case G_LOAD: - I.setDesc(TII.get(ARM::LDRi12)); - AddDefaultPred(MIB.addImm(0)); + case G_CONSTANT: { + unsigned Reg = I.getOperand(0).getReg(); + if (MRI.getType(Reg).getSizeInBits() != 32) + return false; + + assert(RBI.getRegBank(Reg, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Expected constant to live in a GPR"); + I.setDesc(TII.get(ARM::MOVi)); + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + break; + } + case G_STORE: + case G_LOAD: { + const auto &MemOp = **I.memoperands_begin(); + if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) { + DEBUG(dbgs() << "Atomic load/store not supported yet\n"); + return false; + } + + unsigned Reg = I.getOperand(0).getReg(); + unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID(); + + LLT ValTy = MRI.getType(Reg); + const auto ValSize = ValTy.getSizeInBits(); + + assert((ValSize != 64 || TII.getSubtarget().hasVFP2()) && + "Don't know how to load/store 64-bit value without VFP"); + + const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize); + if (NewOpc == G_LOAD || NewOpc == G_STORE) + return false; + + I.setDesc(TII.get(NewOpc)); + + if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH) + // LDRH has a funny addressing mode (there's already a FIXME for it). + MIB.addReg(0); + MIB.addImm(0).add(predOps(ARMCC::AL)); + break; + } + case G_SEQUENCE: { + if (!selectSequence(MIB, TII, MRI, TRI, RBI)) + return false; break; + } + case G_EXTRACT: { + if (!selectExtract(MIB, TII, MRI, TRI, RBI)) + return false; + break; + } default: return false; } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h index 5072cdd60ce4..530141d92c2c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h +++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h @@ -1,4 +1,4 @@ -//===- ARMInstructionSelector ------------------------------------*- C++ -*-==// +//===- ARMInstructionSelector -----------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,8 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file declares the targeting of the InstructionSelector class for ARM. +// //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H @@ -16,9 +18,9 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" namespace llvm { + class ARMBaseInstrInfo; class ARMBaseRegisterInfo; -class ARMBaseTargetMachine; class ARMRegisterBankInfo; class ARMSubtarget; @@ -27,7 +29,7 @@ public: ARMInstructionSelector(const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI); - virtual bool select(MachineInstr &I) const override; + bool select(MachineInstr &I) const override; private: const ARMBaseInstrInfo &TII; @@ -35,5 +37,6 @@ private: const ARMRegisterBankInfo &RBI; }; -} // End llvm namespace. -#endif +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index 255ea4bc7198..994bbd673dd8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "ARMLegalizerInfo.h" +#include "ARMSubtarget.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" @@ -23,22 +24,53 @@ using namespace llvm; #error "You shouldn't build this" #endif -ARMLegalizerInfo::ARMLegalizerInfo() { +ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { using namespace TargetOpcode; const LLT p0 = LLT::pointer(0, 32); + const LLT s1 = LLT::scalar(1); const LLT s8 = LLT::scalar(8); const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); + const LLT s64 = LLT::scalar(64); setAction({G_FRAME_INDEX, p0}, Legal); - setAction({G_LOAD, s32}, Legal); - setAction({G_LOAD, 1, p0}, Legal); + for (unsigned Op : {G_LOAD, G_STORE}) { + for (auto Ty : {s1, s8, s16, s32, p0}) + setAction({Op, Ty}, Legal); + setAction({Op, 1, p0}, Legal); + } - for (auto Ty : {s8, s16, s32}) + for (auto Ty : {s1, s8, s16, s32}) setAction({G_ADD, Ty}, Legal); + for (unsigned Op : {G_SEXT, G_ZEXT}) { + setAction({Op, s32}, Legal); + for (auto Ty : {s1, s8, s16}) + setAction({Op, 1, Ty}, Legal); + } + + setAction({G_GEP, p0}, Legal); + setAction({G_GEP, 1, s32}, Legal); + + setAction({G_CONSTANT, s32}, Legal); + + if (!ST.useSoftFloat() && ST.hasVFP2()) { + setAction({G_FADD, s32}, Legal); + setAction({G_FADD, s64}, Legal); + + setAction({G_LOAD, s64}, Legal); + setAction({G_STORE, s64}, Legal); + } else { + for (auto Ty : {s32, s64}) + setAction({G_FADD, Ty}, Libcall); + } + + for (unsigned Op : {G_FREM, G_FPOW}) + for (auto Ty : {s32, s64}) + setAction({Op, Ty}, Libcall); + computeTables(); } diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h index ca3eea81271b..0b8a608a6bde 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h @@ -18,12 +18,12 @@ namespace llvm { -class LLVMContext; +class ARMSubtarget; /// This class provides the information for the target register banks. class ARMLegalizerInfo : public LegalizerInfo { public: - ARMLegalizerInfo(); + ARMLegalizerInfo(const ARMSubtarget &ST); }; } // End llvm namespace. #endif diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 48ab491b5be9..72fcf7cd6a4f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -517,8 +517,12 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, if (InsertSub) { // An instruction above couldn't be updated, so insert a sub. - AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) - .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); + BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base) + .add(t1CondCodeOp(true)) + .addReg(Base) + .addImm(WordOffset * 4) + .addImm(Pred) + .addReg(PredReg); return; } @@ -534,9 +538,12 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, // information and *always* have to reset at the end of a block. // See PR21029. if (MBBI != MBB.end()) --MBBI; - AddDefaultT1CC( - BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) - .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); + BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base) + .add(t1CondCodeOp(true)) + .addReg(Base) + .addImm(WordOffset * 4) + .addImm(Pred) + .addReg(PredReg); } } @@ -602,13 +609,12 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( // Exception: If the base register is in the input reglist, Thumb1 LDM is // non-writeback. // It's also not possible to merge an STR of the base register in Thumb1. - if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { + if (isThumb1 && ContainsReg(Regs, Base)) { assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); - if (Opcode == ARM::tLDRi) { + if (Opcode == ARM::tLDRi) Writeback = false; - } else if (Opcode == ARM::tSTRi) { + else if (Opcode == ARM::tSTRi) return nullptr; - } } ARM_AM::AMSubMode Mode = ARM_AM::ia; @@ -700,8 +706,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( .addReg(Base, getKillRegState(KillOldBase)); } else BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) - .addReg(Base, getKillRegState(KillOldBase)) - .addImm(Pred).addReg(PredReg); + .addReg(Base, getKillRegState(KillOldBase)) + .add(predOps(Pred, PredReg)); // The following ADDS/SUBS becomes an update. Base = NewBase; @@ -710,17 +716,21 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( if (BaseOpc == ARM::tADDrSPi) { assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) - .addImm(Pred).addReg(PredReg); + .addReg(Base, getKillRegState(KillOldBase)) + .addImm(Offset / 4) + .add(predOps(Pred, PredReg)); } else - AddDefaultT1CC( - BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) - .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) - .addImm(Pred).addReg(PredReg); + BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) + .add(t1CondCodeOp(true)) + .addReg(Base, getKillRegState(KillOldBase)) + .addImm(Offset) + .add(predOps(Pred, PredReg)); } else { BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) - .addImm(Pred).addReg(PredReg).addReg(0); + .addReg(Base, getKillRegState(KillOldBase)) + .addImm(Offset) + .add(predOps(Pred, PredReg)) + .add(condCodeOp()); } Base = NewBase; BaseKill = true; // New base is always killed straight away. @@ -1259,7 +1269,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { // Transfer the rest of operands. for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) - MIB.addOperand(MI->getOperand(OpNum)); + MIB.add(MI->getOperand(OpNum)); // Transfer memoperands. MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); @@ -1392,14 +1402,19 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { } else { int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) - .addReg(Base, RegState::Define) - .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); + .addReg(Base, RegState::Define) + .addReg(Base) + .addReg(0) + .addImm(Imm) + .add(predOps(Pred, PredReg)); } } else { // t2LDR_PRE, t2LDR_POST BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) - .addReg(Base, RegState::Define) - .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + .addReg(Base, RegState::Define) + .addReg(Base) + .addImm(Offset) + .add(predOps(Pred, PredReg)); } } else { MachineOperand &MO = MI->getOperand(0); @@ -1410,13 +1425,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); // STR_PRE, STR_POST BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) - .addReg(MO.getReg(), getKillRegState(MO.isKill())) - .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); + .addReg(MO.getReg(), getKillRegState(MO.isKill())) + .addReg(Base) + .addReg(0) + .addImm(Imm) + .add(predOps(Pred, PredReg)); } else { // t2STR_PRE, t2STR_POST BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) - .addReg(MO.getReg(), getKillRegState(MO.isKill())) - .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + .addReg(MO.getReg(), getKillRegState(MO.isKill())) + .addReg(Base) + .addImm(Offset) + .add(predOps(Pred, PredReg)); } } MBB.erase(MBBI); @@ -1462,12 +1482,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { DebugLoc DL = MI.getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { - MIB.addOperand(Reg0Op).addOperand(Reg1Op) - .addReg(BaseOp.getReg(), RegState::Define); + MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define); } else { assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); - MIB.addReg(BaseOp.getReg(), RegState::Define) - .addOperand(Reg0Op).addOperand(Reg1Op); + MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op); } MIB.addReg(BaseOp.getReg(), RegState::Kill) .addImm(Offset).addImm(Pred).addReg(PredReg); @@ -1477,7 +1495,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { // Transfer implicit operands. for (const MachineOperand &MO : MI.implicit_operands()) - MIB.addOperand(MO); + MIB.add(MO); MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MBB.erase(MBBI); @@ -1891,8 +1909,9 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { for (auto Use : Prev->uses()) if (Use.isKill()) { - AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) - .addReg(Use.getReg(), RegState::Kill)) + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX)) + .addReg(Use.getReg(), RegState::Kill) + .add(predOps(ARMCC::AL)) .copyImplicitOps(*MBBI); MBB.erase(MBBI); MBB.erase(Prev); @@ -1942,6 +1961,7 @@ namespace { static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} + AliasAnalysis *AA; const DataLayout *TD; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -1955,6 +1975,11 @@ namespace { return ARM_PREALLOC_LOAD_STORE_OPT_NAME; } + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AAResultsWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + private: bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc, unsigned &EvenReg, @@ -1984,6 +2009,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { TRI = STI->getRegisterInfo(); MRI = &Fn.getRegInfo(); MF = &Fn; + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); bool Modified = false; for (MachineBasicBlock &MFI : Fn) @@ -1997,28 +2023,19 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator E, SmallPtrSetImpl<MachineInstr*> &MemOps, SmallSet<unsigned, 4> &MemRegs, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + AliasAnalysis *AA) { // Are there stores / loads / calls between them? - // FIXME: This is overly conservative. We should make use of alias information - // some day. SmallSet<unsigned, 4> AddedRegPressure; while (++I != E) { if (I->isDebugValue() || MemOps.count(&*I)) continue; if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) return false; - if (isLd && I->mayStore()) - return false; - if (!isLd) { - if (I->mayLoad()) - return false; - // It's not safe to move the first 'str' down. - // str r1, [r0] - // strh r5, [r0] - // str r4, [r0, #+4] - if (I->mayStore()) - return false; - } + if (I->mayStore() || (!isLd && I->mayLoad())) + for (MachineInstr *MemOp : MemOps) + if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false)) + return false; for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { MachineOperand &MO = I->getOperand(j); if (!MO.isReg()) @@ -2142,33 +2159,40 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, unsigned LastBytes = 0; unsigned NumMove = 0; for (int i = Ops.size() - 1; i >= 0; --i) { + // Make sure each operation has the same kind. MachineInstr *Op = Ops[i]; - unsigned Loc = MI2LocMap[Op]; - if (Loc <= FirstLoc) { - FirstLoc = Loc; - FirstOp = Op; - } - if (Loc >= LastLoc) { - LastLoc = Loc; - LastOp = Op; - } - unsigned LSMOpcode = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); if (LastOpcode && LSMOpcode != LastOpcode) break; + // Check that we have a continuous set of offsets. int Offset = getMemoryOpOffset(*Op); unsigned Bytes = getLSMultipleTransferSize(Op); if (LastBytes) { if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) break; } + + // Don't try to reschedule too many instructions. + if (NumMove == 8) // FIXME: Tune this limit. + break; + + // Found a mergable instruction; save information about it. + ++NumMove; LastOffset = Offset; LastBytes = Bytes; LastOpcode = LSMOpcode; - if (++NumMove == 8) // FIXME: Tune this limit. - break; + + unsigned Loc = MI2LocMap[Op]; + if (Loc <= FirstLoc) { + FirstLoc = Loc; + FirstOp = Op; + } + if (Loc >= LastLoc) { + LastLoc = Loc; + LastOp = Op; + } } if (NumMove <= 1) @@ -2176,7 +2200,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, else { SmallPtrSet<MachineInstr*, 4> MemOps; SmallSet<unsigned, 4> MemRegs; - for (int i = NumMove-1; i >= 0; --i) { + for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) { MemOps.insert(Ops[i]); MemRegs.insert(Ops[i]->getOperand(0).getReg()); } @@ -2186,7 +2210,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. if (DoMove) DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, - MemOps, MemRegs, TRI); + MemOps, MemRegs, TRI, AA); if (!DoMove) { for (unsigned i = 0; i != NumMove; ++i) Ops.pop_back(); diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index 07044b9697b6..0fd98268723a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -14,23 +14,36 @@ #include "ARM.h" #include "ARMAsmPrinter.h" +#include "ARMBaseInstrInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" +#include "llvm/ADT/APFloat.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Mangler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" -using namespace llvm; +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> +using namespace llvm; MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol) { + MCSymbolRefExpr::VariantKind SymbolVariant = MCSymbolRefExpr::VK_None; + if (MO.getTargetFlags() & ARMII::MO_SBREL) + SymbolVariant = MCSymbolRefExpr::VK_ARM_SBREL; + const MCExpr *Expr = - MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext); switch (MO.getTargetFlags() & ARMII::MO_OPTION_MASK) { default: llvm_unreachable("Unknown target flag on symbol operand"); @@ -38,12 +51,12 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, break; case ARMII::MO_LO16: Expr = - MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext); Expr = ARMMCExpr::createLower16(Expr, OutContext); break; case ARMII::MO_HI16: Expr = - MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext); Expr = ARMMCExpr::createUpper16(Expr, OutContext); break; } @@ -75,11 +88,10 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( MO.getMBB()->getSymbol(), OutContext)); break; - case MachineOperand::MO_GlobalAddress: { + case MachineOperand::MO_GlobalAddress: MCOp = GetSymbolRef(MO, GetARMGVSymbol(MO.getGlobal(), MO.getTargetFlags())); break; - } case MachineOperand::MO_ExternalSymbol: MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp index 50d8f0941460..e25d36b57616 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" using namespace llvm; @@ -15,10 +16,4 @@ void ARMFunctionInfo::anchor() {} ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()), - hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()), - StByValParamsPadding(0), ArgRegsSaveSize(0), ReturnRegsCount(0), - HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), - FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), - GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false), ArgumentStackSize(0), - IsSplitCSR(false), PromotedGlobalsIncrease(0) {} + hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()) {} diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 8c485e89bf54..816116772995 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -14,11 +14,11 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H -#include "ARMSubtarget.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include <utility> namespace llvm { @@ -29,42 +29,42 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// isThumb - True if this function is compiled under Thumb mode. /// Used to initialized Align, so must precede it. - bool isThumb; + bool isThumb = false; /// hasThumb2 - True if the target architecture supports Thumb2. Do not use /// to determine if function is compiled under Thumb mode, for that use /// 'isThumb'. - bool hasThumb2; + bool hasThumb2 = false; /// StByValParamsPadding - For parameter that is split between /// GPRs and memory; while recovering GPRs part, when /// StackAlignment > 4, and GPRs-part-size mod StackAlignment != 0, /// we need to insert gap before parameter start address. It allows to /// "attach" GPR-part to the part that was passed via stack. - unsigned StByValParamsPadding; + unsigned StByValParamsPadding = 0; /// VarArgsRegSaveSize - Size of the register save area for vararg functions. /// - unsigned ArgRegsSaveSize; + unsigned ArgRegsSaveSize = 0; /// ReturnRegsCount - Number of registers used up in the return. - unsigned ReturnRegsCount; + unsigned ReturnRegsCount = 0; /// HasStackFrame - True if this function has a stack frame. Set by /// determineCalleeSaves(). - bool HasStackFrame; + bool HasStackFrame = false; /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by /// emitPrologue. - bool RestoreSPFromFP; + bool RestoreSPFromFP = false; /// LRSpilledForFarJump - True if the LR register has been for spilled to /// enable far jump. - bool LRSpilledForFarJump; + bool LRSpilledForFarJump = false; /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer /// spill stack offset. - unsigned FramePtrSpillOffset; + unsigned FramePtrSpillOffset = 0; /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved /// register spills areas. For Mac OS X: @@ -77,16 +77,16 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// /// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3. /// Some may be spilled after the stack has been realigned. - unsigned GPRCS1Offset; - unsigned GPRCS2Offset; - unsigned DPRCSOffset; + unsigned GPRCS1Offset = 0; + unsigned GPRCS2Offset = 0; + unsigned DPRCSOffset = 0; /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills /// areas. - unsigned GPRCS1Size; - unsigned GPRCS2Size; - unsigned DPRCSAlignGapSize; - unsigned DPRCSSize; + unsigned GPRCS1Size = 0; + unsigned GPRCS2Size = 0; + unsigned DPRCSAlignGapSize = 0; + unsigned DPRCSSize = 0; /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in /// the aligned portion of the stack frame. This is always a contiguous @@ -95,15 +95,15 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// We do not keep track of the frame indices used for these registers - they /// behave like any other frame index in the aligned stack frame. These /// registers also aren't included in DPRCSSize above. - unsigned NumAlignedDPRCS2Regs; + unsigned NumAlignedDPRCS2Regs = 0; - unsigned PICLabelUId; + unsigned PICLabelUId = 0; /// VarArgsFrameIndex - FrameIndex for start of varargs area. - int VarArgsFrameIndex; + int VarArgsFrameIndex = 0; /// HasITBlocks - True if IT blocks have been inserted. - bool HasITBlocks; + bool HasITBlocks = false; /// CPEClones - Track constant pool entries clones created by Constant Island /// pass. @@ -111,7 +111,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// ArgumentStackSize - amount of bytes on stack consumed by the arguments /// being passed on the stack - unsigned ArgumentStackSize; + unsigned ArgumentStackSize = 0; /// CoalescedWeights - mapping of basic blocks to the rolling counter of /// coalesced weights. @@ -119,26 +119,16 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// True if this function has a subset of CSRs that is handled explicitly via /// copies. - bool IsSplitCSR; + bool IsSplitCSR = false; /// Globals that have had their storage promoted into the constant pool. SmallPtrSet<const GlobalVariable*,2> PromotedGlobals; /// The amount the literal pool has been increasedby due to promoted globals. - int PromotedGlobalsIncrease; + int PromotedGlobalsIncrease = 0; public: - ARMFunctionInfo() : - isThumb(false), - hasThumb2(false), - ArgRegsSaveSize(0), ReturnRegsCount(0), HasStackFrame(false), - RestoreSPFromFP(false), - LRSpilledForFarJump(false), - FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), - GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0), - NumAlignedDPRCS2Regs(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false), - PromotedGlobalsIncrease(0) {} + ARMFunctionInfo() = default; explicit ARMFunctionInfo(MachineFunction &MF); @@ -250,6 +240,7 @@ public: PromotedGlobalsIncrease = Sz; } }; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp index 324087d670b5..08f3da738868 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -13,11 +13,15 @@ #include "ARMRegisterBankInfo.h" #include "ARMInstrInfo.h" // For the register classes +#include "ARMSubtarget.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#define GET_TARGET_REGBANK_IMPL +#include "ARMGenRegisterBank.inc" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -29,44 +33,109 @@ using namespace llvm; // into an ARMGenRegisterBankInfo.def (similar to AArch64). namespace llvm { namespace ARM { -const uint32_t GPRCoverageData[] = { - // Classes 0-31 - (1u << ARM::GPRRegClassID) | (1u << ARM::GPRwithAPSRRegClassID) | - (1u << ARM::GPRnopcRegClassID) | (1u << ARM::rGPRRegClassID) | - (1u << ARM::hGPRRegClassID) | (1u << ARM::tGPRRegClassID) | - (1u << ARM::GPRnopc_and_hGPRRegClassID) | - (1u << ARM::hGPR_and_rGPRRegClassID) | (1u << ARM::tcGPRRegClassID) | - (1u << ARM::tGPR_and_tcGPRRegClassID) | (1u << ARM::GPRspRegClassID) | - (1u << ARM::hGPR_and_tcGPRRegClassID), - // Classes 32-63 - 0, - // Classes 64-96 - 0, - // FIXME: Some of the entries below this point can be safely removed once - // this is tablegenerated. It's only needed because of the hardcoded - // register class limit. - // Classes 97-128 - 0, - // Classes 129-160 - 0, - // Classes 161-192 - 0, - // Classes 193-224 - 0, +enum PartialMappingIdx { + PMI_GPR, + PMI_SPR, + PMI_DPR, + PMI_Min = PMI_GPR, +}; + +RegisterBankInfo::PartialMapping PartMappings[]{ + // GPR Partial Mapping + {0, 32, GPRRegBank}, + // SPR Partial Mapping + {0, 32, FPRRegBank}, + // DPR Partial Mapping + {0, 64, FPRRegBank}, }; -RegisterBank GPRRegBank(ARM::GPRRegBankID, "GPRB", 32, ARM::GPRCoverageData); -RegisterBank *RegBanks[] = {&GPRRegBank}; +#ifndef NDEBUG +static bool checkPartMapping(const RegisterBankInfo::PartialMapping &PM, + unsigned Start, unsigned Length, + unsigned RegBankID) { + return PM.StartIdx == Start && PM.Length == Length && + PM.RegBank->getID() == RegBankID; +} + +static void checkPartialMappings() { + assert( + checkPartMapping(PartMappings[PMI_GPR - PMI_Min], 0, 32, GPRRegBankID) && + "Wrong mapping for GPR"); + assert( + checkPartMapping(PartMappings[PMI_SPR - PMI_Min], 0, 32, FPRRegBankID) && + "Wrong mapping for SPR"); + assert( + checkPartMapping(PartMappings[PMI_DPR - PMI_Min], 0, 64, FPRRegBankID) && + "Wrong mapping for DPR"); +} +#endif -RegisterBankInfo::PartialMapping GPRPartialMapping{0, 32, GPRRegBank}; +enum ValueMappingIdx { + InvalidIdx = 0, + GPR3OpsIdx = 1, + SPR3OpsIdx = 4, + DPR3OpsIdx = 7, +}; RegisterBankInfo::ValueMapping ValueMappings[] = { - {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}}; + // invalid + {nullptr, 0}, + // 3 ops in GPRs + {&PartMappings[PMI_GPR - PMI_Min], 1}, + {&PartMappings[PMI_GPR - PMI_Min], 1}, + {&PartMappings[PMI_GPR - PMI_Min], 1}, + // 3 ops in SPRs + {&PartMappings[PMI_SPR - PMI_Min], 1}, + {&PartMappings[PMI_SPR - PMI_Min], 1}, + {&PartMappings[PMI_SPR - PMI_Min], 1}, + // 3 ops in DPRs + {&PartMappings[PMI_DPR - PMI_Min], 1}, + {&PartMappings[PMI_DPR - PMI_Min], 1}, + {&PartMappings[PMI_DPR - PMI_Min], 1}}; + +#ifndef NDEBUG +static bool checkValueMapping(const RegisterBankInfo::ValueMapping &VM, + RegisterBankInfo::PartialMapping *BreakDown) { + return VM.NumBreakDowns == 1 && VM.BreakDown == BreakDown; +} + +static void checkValueMappings() { + assert(checkValueMapping(ValueMappings[GPR3OpsIdx], + &PartMappings[PMI_GPR - PMI_Min]) && + "Wrong value mapping for 3 GPR ops instruction"); + assert(checkValueMapping(ValueMappings[GPR3OpsIdx + 1], + &PartMappings[PMI_GPR - PMI_Min]) && + "Wrong value mapping for 3 GPR ops instruction"); + assert(checkValueMapping(ValueMappings[GPR3OpsIdx + 2], + &PartMappings[PMI_GPR - PMI_Min]) && + "Wrong value mapping for 3 GPR ops instruction"); + + assert(checkValueMapping(ValueMappings[SPR3OpsIdx], + &PartMappings[PMI_SPR - PMI_Min]) && + "Wrong value mapping for 3 SPR ops instruction"); + assert(checkValueMapping(ValueMappings[SPR3OpsIdx + 1], + &PartMappings[PMI_SPR - PMI_Min]) && + "Wrong value mapping for 3 SPR ops instruction"); + assert(checkValueMapping(ValueMappings[SPR3OpsIdx + 2], + &PartMappings[PMI_SPR - PMI_Min]) && + "Wrong value mapping for 3 SPR ops instruction"); + + assert(checkValueMapping(ValueMappings[DPR3OpsIdx], + &PartMappings[PMI_DPR - PMI_Min]) && + "Wrong value mapping for 3 DPR ops instruction"); + assert(checkValueMapping(ValueMappings[DPR3OpsIdx + 1], + &PartMappings[PMI_DPR - PMI_Min]) && + "Wrong value mapping for 3 DPR ops instruction"); + assert(checkValueMapping(ValueMappings[DPR3OpsIdx + 2], + &PartMappings[PMI_DPR - PMI_Min]) && + "Wrong value mapping for 3 DPR ops instruction"); +} +#endif } // end namespace arm } // end namespace llvm ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) - : RegisterBankInfo(ARM::RegBanks, ARM::NumRegisterBanks) { + : ARMGenRegisterBankInfo() { static bool AlreadyInit = false; // We have only one set of register banks, whatever the subtarget // is. Therefore, the initialization of the RegBanks table should be @@ -97,6 +166,11 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) && "Subclass not added?"); assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit"); + +#ifndef NDEBUG + ARM::checkPartialMappings(); + ARM::checkValueMappings(); +#endif } const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass( @@ -105,8 +179,16 @@ const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass( switch (RC.getID()) { case GPRRegClassID: + case GPRnopcRegClassID: + case GPRspRegClassID: case tGPR_and_tcGPRRegClassID: + case tGPRRegClassID: return getRegBank(ARM::GPRRegBankID); + case SPR_8RegClassID: + case SPRRegClassID: + case DPR_8RegClassID: + case DPRRegClassID: + return getRegBank(ARM::FPRRegBankID); default: llvm_unreachable("Unsupported register kind"); } @@ -128,23 +210,83 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { using namespace TargetOpcode; + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + unsigned NumOperands = MI.getNumOperands(); - const ValueMapping *OperandsMapping = &ARM::ValueMappings[0]; + const ValueMapping *OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; switch (Opc) { case G_ADD: - case G_LOAD: + case G_SEXT: + case G_ZEXT: + case G_GEP: // FIXME: We're abusing the fact that everything lives in a GPR for now; in // the real world we would use different mappings. - OperandsMapping = &ARM::ValueMappings[0]; + OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; + break; + case G_LOAD: + case G_STORE: + OperandsMapping = + Ty.getSizeInBits() == 64 + ? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}) + : &ARM::ValueMappings[ARM::GPR3OpsIdx]; + break; + case G_FADD: + assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) && + "Unsupported size for G_FADD"); + OperandsMapping = Ty.getSizeInBits() == 64 + ? &ARM::ValueMappings[ARM::DPR3OpsIdx] + : &ARM::ValueMappings[ARM::SPR3OpsIdx]; break; + case G_CONSTANT: case G_FRAME_INDEX: - OperandsMapping = getOperandsMapping({&ARM::ValueMappings[0], nullptr}); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; + case G_SEQUENCE: { + // We only support G_SEQUENCE for creating a double precision floating point + // value out of two GPRs. + LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); + LLT Ty2 = MRI.getType(MI.getOperand(3).getReg()); + if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 || + Ty2.getSizeInBits() != 32) + return InstructionMapping{}; + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, + &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); + break; + } + case G_EXTRACT: { + // We only support G_EXTRACT for splitting a double precision floating point + // value into two GPRs. + LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); + if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 64 || + MI.getOperand(2).getImm() % 32 != 0) + return InstructionMapping{}; + OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx], + nullptr, nullptr}); + break; + } default: return InstructionMapping{}; } +#ifndef NDEBUG + for (unsigned i = 0; i < NumOperands; i++) { + for (const auto &Mapping : OperandsMapping[i]) { + assert( + (Mapping.RegBank->getID() != ARM::FPRRegBankID || + MF.getSubtarget<ARMSubtarget>().hasVFP2()) && + "Trying to use floating point register bank on target without vfp"); + } + } +#endif + return InstructionMapping{DefaultMappingID, /*Cost=*/1, OperandsMapping, NumOperands}; } diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h index 773920ee57a7..5222c1e6389f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h @@ -16,19 +16,20 @@ #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#define GET_REGBANK_DECLARATIONS +#include "ARMGenRegisterBank.inc" + namespace llvm { class TargetRegisterInfo; -namespace ARM { -enum { - GPRRegBankID = 0, // General purpose registers - NumRegisterBanks, +class ARMGenRegisterBankInfo : public RegisterBankInfo { +#define GET_TARGET_REGBANK_CLASS +#include "ARMGenRegisterBank.inc" }; -} // end namespace ARM /// This class provides the information for the target register banks. -class ARMRegisterBankInfo final : public RegisterBankInfo { +class ARMRegisterBankInfo final : public ARMGenRegisterBankInfo { public: ARMRegisterBankInfo(const TargetRegisterInfo &TRI); diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBanks.td b/contrib/llvm/lib/Target/ARM/ARMRegisterBanks.td new file mode 100644 index 000000000000..7cd2d60d36a4 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBanks.td @@ -0,0 +1,14 @@ +//=- ARMRegisterBank.td - Describe the AArch64 Banks ---------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +def GPRRegBank : RegisterBank<"GPRB", [GPR, GPRwithAPSR]>; +def FPRRegBank : RegisterBank<"FPRB", [SPR, DPR]>; diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td index b7d2d34614df..87eb4c2b9074 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td +++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Instruction scheduling annotations for out-of-order CPUs. +// Instruction scheduling annotations for in-order and out-of-order CPUs. // These annotations are independent of the itinerary class defined below. // Here we define the subtarget independent read/write per-operand resources. // The subtarget schedule definitions will then map these to the subtarget's @@ -54,6 +54,9 @@ // } // def : ReadAdvance<ReadAdvanceALUsr, 3>; +//===----------------------------------------------------------------------===// +// Sched definitions for integer pipeline instructions +// // Basic ALU operation. def WriteALU : SchedWrite; def ReadALU : SchedRead; @@ -69,24 +72,65 @@ def WriteCMP : SchedWrite; def WriteCMPsi : SchedWrite; def WriteCMPsr : SchedWrite; -// Division. -def WriteDiv : SchedWrite; +// Multiplys. +def WriteMUL16 : SchedWrite; // 16-bit multiply. +def WriteMUL32 : SchedWrite; // 32-bit multiply. +def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg. +def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg. +def ReadMUL : SchedRead; + +// Multiply-accumulates. +def WriteMAC16 : SchedWrite; // 16-bit mac. +def WriteMAC32 : SchedWrite; // 32-bit mac. +def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg. +def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg. +def ReadMAC : SchedRead; + +// Divisions. +def WriteDIV : SchedWrite; -// Loads. +// Loads/Stores. def WriteLd : SchedWrite; def WritePreLd : SchedWrite; +def WriteST : SchedWrite; // Branches. def WriteBr : SchedWrite; def WriteBrL : SchedWrite; def WriteBrTbl : SchedWrite; -// Fixpoint conversions. -def WriteCvtFP : SchedWrite; - // Noop. def WriteNoop : SchedWrite; +//===----------------------------------------------------------------------===// +// Sched definitions for floating-point and neon instructions +// +// Floating point conversions +def WriteFPCVT : SchedWrite; +def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa + +// ALU operations (32/64-bit) +def WriteFPALU32 : SchedWrite; +def WriteFPALU64 : SchedWrite; + +// Multiplication +def WriteFPMUL32 : SchedWrite; +def WriteFPMUL64 : SchedWrite; +def ReadFPMUL : SchedRead; // multiplier read +def ReadFPMAC : SchedRead; // accumulator read + +// Multiply-accumulate +def WriteFPMAC32 : SchedWrite; +def WriteFPMAC64 : SchedWrite; + +// Division +def WriteFPDIV32 : SchedWrite; +def WriteFPDIV64 : SchedWrite; + +// Square-root +def WriteFPSQRT32 : SchedWrite; +def WriteFPSQRT64 : SchedWrite; + // Define TII for use in SchedVariant Predicates. def : PredicateProlog<[{ const ARMBaseInstrInfo *TII = diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td index 519e595bd184..8fb8a2a3b6d2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td @@ -1944,6 +1944,16 @@ def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5; def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; let NumMicroOps = 0; } +def : SchedAlias<WriteMUL16, A9WriteM16>; +def : SchedAlias<WriteMUL32, A9WriteM>; +def : SchedAlias<WriteMUL64Lo, A9WriteM>; +def : SchedAlias<WriteMUL64Hi, A9WriteMHi>; +def : SchedAlias<WriteMAC16, A9WriteM16>; +def : SchedAlias<WriteMAC32, A9WriteM>; +def : SchedAlias<WriteMAC64Lo, A9WriteM>; +def : SchedAlias<WriteMAC64Hi, A9WriteMHi>; +def : ReadAdvance<ReadMUL, 0>; +def : ReadAdvance<ReadMAC, 0>; // Floating-point // Only one FP or AGU instruction may issue per cycle. We model this @@ -1953,6 +1963,7 @@ def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; } def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; } + def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; } def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; } @@ -1992,6 +2003,7 @@ def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; } // Load Integer. def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; } +def : SchedAlias<WriteLd, A9WriteL>; // Load the upper 32-bits using the same micro-op. def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3; let NumMicroOps = 0; } @@ -2471,6 +2483,34 @@ def : SchedAlias<WriteALUsr, A9WriteALUsr>; def : SchedAlias<WriteALUSsr, A9WriteALUsr>; def : SchedAlias<ReadALU, A9ReadALU>; def : SchedAlias<ReadALUsr, A9ReadALU>; +def : SchedAlias<WriteST, A9WriteS>; + +// ===---------------------------------------------------------------------===// +// Floating-point. Map target defined SchedReadWrite to processor specific ones +// +def : WriteRes<WriteFPCVT, [A9UnitFP, A9UnitAGU]> { let Latency = 4; } +def : SchedAlias<WriteFPMOV, A9WriteFMov>; + +def : SchedAlias<WriteFPALU32, A9WriteF>; +def : SchedAlias<WriteFPALU64, A9WriteF>; + +def : SchedAlias<WriteFPMUL32, A9WriteFMulS>; +def : SchedAlias<WriteFPMUL64, A9WriteFMulD>; + +def : SchedAlias<WriteFPMAC32, A9WriteFMAS>; +def : SchedAlias<WriteFPMAC64, A9WriteFMAD>; + +def : SchedAlias<WriteFPDIV32, A9WriteFDivS>; +def : SchedAlias<WriteFPDIV64, A9WriteFDivD>; +def : SchedAlias<WriteFPSQRT32, A9WriteFSqrtS>; +def : SchedAlias<WriteFPSQRT64, A9WriteFSqrtD>; + +def : ReadAdvance<ReadFPMUL, 0>; +def : ReadAdvance<ReadFPMAC, 0>; + +// ===---------------------------------------------------------------------===// +// Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types. +// def : InstRW< [WriteALU], (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr", "BICrr")>; @@ -2518,12 +2558,11 @@ def : InstRW<[A9WriteLb], "LDRH", "LDRSH", "LDRSB")>; def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; -def : WriteRes<WriteDiv, []> { let Latency = 0; } +def : WriteRes<WriteDIV, []> { let Latency = 0; } def : WriteRes<WriteBr, [A9UnitB]>; def : WriteRes<WriteBrL, [A9UnitB]>; def : WriteRes<WriteBrTbl, [A9UnitB]>; def : WriteRes<WritePreLd, []>; -def : SchedAlias<WriteCvtFP, A9WriteF>; def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } } // SchedModel = CortexA9Model diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td index 1b40742a093b..537e5da9669f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td @@ -70,15 +70,13 @@ def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; } def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; } def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; } +// Multiply - aliased to sub-target specific later + // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2) -def : WriteRes<WriteDiv, [R52UnitDiv]> { - let Latency = 8; let ResourceCycles = [8]; // not pipelined +def : WriteRes<WriteDIV, [R52UnitDiv]> { + let Latency = 8; let ResourceCycles = [8]; // non-pipelined } -// Loads -def : WriteRes<WriteLd, [R52UnitLd]> { let Latency = 4; } -def : WriteRes<WritePreLd, [R52UnitLd]> { let Latency = 4; } - // Branches - LR written in Late EX2 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; } def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; } @@ -86,11 +84,44 @@ def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; } // Misc def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } -def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; } +// Integer pipeline by-passes def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS +def : ReadAdvance<ReadMUL, 0>; +def : ReadAdvance<ReadMAC, 0>; + +// Floating-point. Map target-defined SchedReadWrites to subtarget +def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; } + +def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> { + let Latency = 6; +} + +def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> { + let Latency = 11; // as it is internally two insns (MUL then ADD) +} +def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL, + R52UnitFPALU, R52UnitFPALU]> { + let Latency = 11; +} + +def : WriteRes<WriteFPDIV32, [R52UnitDiv]> { + let Latency = 7; // FP div takes fixed #cycles + let ResourceCycles = [7]; // is not pipelined +} + +def : WriteRes<WriteFPDIV64, [R52UnitDiv]> { + let Latency = 17; + let ResourceCycles = [17]; +} + +def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; } +def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; } + +def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1 +def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1 //===----------------------------------------------------------------------===// // Subtarget-specific SchedReadWrites. @@ -106,6 +137,9 @@ def : ReadAdvance<R52Read_F2, 2>; // Cortex-R52 specific SchedWrites for use with InstRW def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; } +def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> { + let Latency = 4; let NumMicroOps = 0; +} def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> { let Latency = 8; let ResourceCycles = [8]; // not pipelined } @@ -120,6 +154,19 @@ def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; } def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; } def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; } +// Alias generics to sub-target specific +def : SchedAlias<WriteMUL16, R52WriteMAC>; +def : SchedAlias<WriteMUL32, R52WriteMAC>; +def : SchedAlias<WriteMUL64Lo, R52WriteMAC>; +def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>; +def : SchedAlias<WriteMAC16, R52WriteMAC>; +def : SchedAlias<WriteMAC32, R52WriteMAC>; +def : SchedAlias<WriteMAC64Lo, R52WriteMAC>; +def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>; +def : SchedAlias<WritePreLd, R52WriteLd>; +def : SchedAlias<WriteLd, R52WriteLd>; +def : SchedAlias<WriteST, R52WriteST>; + def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; } def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { let Latency = 4; @@ -147,19 +194,17 @@ def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL, def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } -def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> { - let Latency = 7; // FP div takes fixed #cycles - let ResourceCycles = [7]; // is not pipelined - } -def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> { - let Latency = 17; - let ResourceCycles = [17]; -} - - //===----------------------------------------------------------------------===// -// Subtarget-specific - map operands to SchedReadWrites +// Floating-point. Map target defined SchedReadWrites to processor specific ones +// +def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>; +def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>; +def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>; +def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>; +//===----------------------------------------------------------------------===// +// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types. +// def : InstRW<[WriteALU], (instrs COPY)>; def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], @@ -235,7 +280,7 @@ def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS], "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>; def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS], - (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; + (instregex "t2SDIV", "t2UDIV")>; // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles. // However, that's non-trivial to specify, so we keep it uniform @@ -294,15 +339,6 @@ def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>; def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>; def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>; -//def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>; -//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>; -//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>; - -//def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>; -//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>; -//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>; - - // Integer Load, Multiple. foreach Lat = 3-25 in { def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> { @@ -492,12 +528,6 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VAC def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>; def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>; -def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>; -def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>; - -def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], - (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>; - def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>; def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>; @@ -687,16 +717,19 @@ def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 6; let NumMicroOps = 3; let ResourceCycles = [2]; + let SingleIssue = 1; } def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 7; let NumMicroOps = 5; let ResourceCycles = [3]; + let SingleIssue = 1; } def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 8; let NumMicroOps = 7; let ResourceCycles = [4]; + let SingleIssue = 1; } def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5; @@ -777,9 +810,8 @@ def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHS def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>; def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>; -def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>; +def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>; def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>; -def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>; def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>; def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>; def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>; diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td index ea2bf4b578f0..dc041c6c6006 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td @@ -133,6 +133,8 @@ let SchedModel = SwiftModel in { def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>; def : ReadAdvance<ReadALU, 0>; def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>; + def : SchedAlias<WriteLd, SwiftWriteP2ThreeCycle>; + def : SchedAlias<WriteST, SwiftWriteP2>; def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[ @@ -166,10 +168,10 @@ let SchedModel = SwiftModel in { def : InstRW<[SwiftWriteP01OneCycle2x_load], (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>; - def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>; + def SwiftWriteP0TwoCycleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>; def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[ - SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>, + SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCycleTwoUops ]>, SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]> ]>; @@ -282,6 +284,18 @@ let SchedModel = SwiftModel in { let ResourceCycles = [2, 3]; } + // Aliasing sub-target specific WriteRes to generic ones + def : SchedAlias<WriteMUL16, SwiftWriteP0FourCycle>; + def : SchedAlias<WriteMUL32, SwiftWriteP0FourCycle>; + def : SchedAlias<WriteMUL64Lo, SwiftP0P0P01FiveCycle>; + def : SchedAlias<WriteMUL64Hi, SwiftWrite5Cycle>; + def : SchedAlias<WriteMAC16, SwiftPredP0P01FourFiveCycle>; + def : SchedAlias<WriteMAC32, SwiftPredP0P01FourFiveCycle>; + def : SchedAlias<WriteMAC64Lo, SwiftWrite5Cycle>; + def : SchedAlias<WriteMAC64Hi, Swift2P03P01FiveCycle>; + def : ReadAdvance<ReadMUL, 0>; + def : SchedAlias<ReadMAC, SwiftReadAdvanceFourCyclesPred>; + // 4.2.15 Integer Multiply Accumulate, Long // 4.2.16 Integer Multiply Accumulate, Dual // 4.2.17 Integer Multiply Accumulate Accumulate, Long @@ -300,7 +314,7 @@ let SchedModel = SwiftModel in { let ResourceCycles = [1, 14]; } // 4.2.18 Integer Divide - def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround. + def : WriteRes<WriteDIV, [SwiftUnitDiv]>; // Workaround. def : InstRW <[SwiftDiv], (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; @@ -310,7 +324,7 @@ let SchedModel = SwiftModel in { let Latency = 3; let NumMicroOps = 2; } - def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { + def SwiftWriteP2P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { let Latency = 4; let NumMicroOps = 2; } @@ -343,7 +357,7 @@ let SchedModel = SwiftModel in { "tLDR(r|i|spi|pci|pciASM)")>; def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>; - def : InstRW<[SwiftWriteP2P01FourCyle], + def : InstRW<[SwiftWriteP2P01FourCycle], (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$", "t2LDRpci_pic", "tLDRS(B|H)")>; def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne], @@ -597,8 +611,6 @@ let SchedModel = SwiftModel in { def : InstRW<[SwiftWriteP1FourCycle], (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH", "VMULL", "VQDMULL")>; - def : InstRW<[SwiftWriteP1SixCycle], - (instregex "VMULD", "VNMULD")>; def : InstRW<[SwiftWriteP1FourCycle], (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)", "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>; @@ -607,8 +619,6 @@ let SchedModel = SwiftModel in { // 4.2.36 Advanced SIMD and VFP, Convert def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>; - // Fixpoint conversions. - def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; } // 4.2.37 Advanced SIMD and VFP, Move def : InstRW<[SwiftWriteP0TwoCycle], @@ -1036,6 +1046,30 @@ let SchedModel = SwiftModel in { def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>; def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>; + // ===---------------------------------------------------------------------===// + // Floating-point. Map target defined SchedReadWrite to processor specific ones + // + def : SchedAlias<WriteFPCVT, SwiftWriteP1FourCycle>; + def : SchedAlias<WriteFPMOV, SwiftWriteP2ThreeCycle>; + + def : SchedAlias<WriteFPALU32, SwiftWriteP0FourCycle>; + def : SchedAlias<WriteFPALU64, SwiftWriteP0SixCycle>; + + def : SchedAlias<WriteFPMUL32, SwiftWriteP1FourCycle>; + def : SchedAlias<WriteFPMUL64, SwiftWriteP1SixCycle>; + + def : SchedAlias<WriteFPMAC32, SwiftWriteP1FourCycle>; + def : SchedAlias<WriteFPMAC64, SwiftWriteP1FourCycle>; + + def : SchedAlias<WriteFPDIV32, SwiftDiv17>; + def : SchedAlias<WriteFPSQRT32, SwiftDiv17>; + + def : SchedAlias<WriteFPDIV64, SwiftDiv32>; + def : SchedAlias<WriteFPSQRT64, SwiftDiv32>; + + def : ReadAdvance<ReadFPMUL, 0>; + def : ReadAdvance<ReadFPMAC, 0>; + // Not specified. def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>; // Preload. diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 3b99762f7157..33dcf9b8fef0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -95,7 +95,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( Entry.Node = Src; Entry.Ty = Type::getInt32Ty(*DAG.getContext()); - Entry.isSExt = false; + Entry.IsSExt = false; Args.push_back(Entry); } else { Entry.Node = Src; @@ -114,11 +114,11 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) - .setCallee( - TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], - TLI->getPointerTy(DAG.getDataLayout())), - std::move(Args)) + .setLibCallee( + TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], + TLI->getPointerTy(DAG.getDataLayout())), + std::move(Args)) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); @@ -198,17 +198,18 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( return Chain; // Issue loads / stores for the trailing (1 - 3) bytes. + auto getRemainingValueType = [](unsigned BytesLeft) { + return (BytesLeft >= 2) ? MVT::i16 : MVT::i8; + }; + auto getRemainingSize = [](unsigned BytesLeft) { + return (BytesLeft >= 2) ? 2 : 1; + }; + unsigned BytesLeftSave = BytesLeft; i = 0; while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - + VT = getRemainingValueType(BytesLeft); + VTSize = getRemainingSize(BytesLeft); Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, dl, MVT::i32)), @@ -224,14 +225,8 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( i = 0; BytesLeft = BytesLeftSave; while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - + VT = getRemainingValueType(BytesLeft); + VTSize = getRemainingSize(BytesLeft); TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, dl, MVT::i32)), diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index e2df0bddd0d1..b8a708a20a95 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -13,25 +13,27 @@ #include "ARMSubtarget.h" #include "ARMFrameLowering.h" -#include "ARMISelLowering.h" #include "ARMInstrInfo.h" -#include "ARMMachineFunctionInfo.h" -#include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Attributes.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/TargetParser.h" +#include <cassert> +#include <string> using namespace llvm; @@ -104,7 +106,7 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, : !isThumb() ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), - TLInfo(TM, *this), GISel() {} + TLInfo(TM, *this) {} const CallLowering *ARMSubtarget::getCallLowering() const { assert(GISel && "Access to GlobalISel APIs not set"); @@ -148,11 +150,11 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isTargetDarwin()) { StringRef ArchName = TargetTriple.getArchName(); - unsigned ArchKind = llvm::ARM::parseArch(ArchName); - if (ArchKind == llvm::ARM::AK_ARMV7S) + unsigned ArchKind = ARM::parseArch(ArchName); + if (ArchKind == ARM::AK_ARMV7S) // Default to the Swift CPU when targeting armv7s/thumbv7s. CPUString = "swift"; - else if (ArchKind == llvm::ARM::AK_ARMV7K) + else if (ArchKind == ARM::AK_ARMV7K) // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k. // ARMv7k does not use SjLj exception handling. CPUString = "cortex-a7"; @@ -200,12 +202,12 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // support in the assembler and linker to be used. This would need to be // fixed to fully support tail calls in Thumb1. // - // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take - // LR. This means if we need to reload LR, it takes an extra instructions, - // which outweighs the value of the tail call; but here we don't know yet - // whether LR is going to be used. Probably the right approach is to - // generate the tail call here and turn it back into CALL/RET in - // emitEpilogue if LR is used. + // For ARMv8-M, we /do/ implement tail calls. Doing this is tricky for v8-M + // baseline, since the LDM/POP instruction on Thumb doesn't take LR. This + // means if we need to reload LR, it takes extra instructions, which outweighs + // the value of the tail call; but here we don't know yet whether LR is going + // to be used. We generate the tail call here and turn it back into CALL/RET + // in emitEpilogue if LR is used. // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, // but we need to make sure there are enough registers; the only valid @@ -274,6 +276,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { case CortexM3: case ExynosM1: case CortexR52: + case Kryo: break; case Krait: PreISelOperandLatencyAdjustment = 1; diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index 8c8218d0f432..40993fc0aa8a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -14,47 +14,93 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H #define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H - +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" #include "ARMISelLowering.h" -#include "ARMInstrInfo.h" #include "ARMSelectionDAGInfo.h" -#include "ARMSubtarget.h" -#include "MCTargetDesc/ARMMCTargetDesc.h" -#include "Thumb1FrameLowering.h" -#include "Thumb1InstrInfo.h" -#include "Thumb2InstrInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/GlobalISel/GISelAccessor.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <memory> #include <string> #define GET_SUBTARGETINFO_HEADER #include "ARMGenSubtargetInfo.inc" namespace llvm { + +class ARMBaseTargetMachine; class GlobalValue; class StringRef; -class TargetOptions; -class ARMBaseTargetMachine; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, - CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexR52, CortexM3, - CortexA32, CortexA35, CortexA53, CortexA57, CortexA72, CortexA73, - Krait, Swift, ExynosM1 + Others, + + CortexA12, + CortexA15, + CortexA17, + CortexA32, + CortexA35, + CortexA5, + CortexA53, + CortexA57, + CortexA7, + CortexA72, + CortexA73, + CortexA8, + CortexA9, + CortexM3, + CortexR4, + CortexR4F, + CortexR5, + CortexR52, + CortexR7, + ExynosM1, + Krait, + Kryo, + Swift }; enum ARMProcClassEnum { - None, AClass, RClass, MClass + None, + + AClass, + MClass, + RClass }; enum ARMArchEnum { - ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te, - ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r, - ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline, + ARMv2, + ARMv2a, + ARMv3, + ARMv3m, + ARMv4, + ARMv4t, + ARMv5, + ARMv5t, + ARMv5te, + ARMv5tej, + ARMv6, + ARMv6k, + ARMv6kz, + ARMv6m, + ARMv6sm, + ARMv6t2, + ARMv7a, + ARMv7em, + ARMv7m, + ARMv7r, + ARMv7ve, + ARMv81a, + ARMv82a, + ARMv8a, + ARMv8mBaseline, + ARMv8mMainline, ARMv8r }; @@ -168,10 +214,6 @@ protected: /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode bool HasHardwareDivideInARM = false; - /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack - /// instructions. - bool HasT2ExtractPack = false; - /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier /// instructions. bool HasDataBarrier = false; @@ -310,6 +352,10 @@ protected: /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS). bool UseSjLjEH = false; + /// Implicitly convert an instruction to a different one if its immediates + /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1. + bool NegativeImmediates = true; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment = 4; @@ -362,6 +408,7 @@ public: unsigned getMaxInlineSizeThreshold() const { return 64; } + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); @@ -373,15 +420,19 @@ public: const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } + const ARMBaseInstrInfo *getInstrInfo() const override { return InstrInfo.get(); } + const ARMTargetLowering *getTargetLowering() const override { return &TLInfo; } + const ARMFrameLowering *getFrameLowering() const override { return FrameLowering.get(); } + const ARMBaseRegisterInfo *getRegisterInfo() const override { return &InstrInfo->getRegisterInfo(); } @@ -451,19 +502,21 @@ public: bool hasCRC() const { return HasCRC; } bool hasRAS() const { return HasRAS; } bool hasVirtualization() const { return HasVirtualization; } + bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } bool hasDivide() const { return HasHardwareDivide; } bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } - bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } bool hasV7Clrex() const { return HasV7Clrex; } bool hasAcquireRelease() const { return HasAcquireRelease; } + bool hasAnyDataBarrier() const { return HasDataBarrier || (hasV6Ops() && !isThumb()); } + bool useMulOps() const { return UseMulOps; } bool useFPVMLx() const { return !SlowFPVMLx; } bool hasVMLxForwarding() const { return HasVMLxForwarding; } @@ -561,9 +614,10 @@ public: TargetTriple.getEnvironment() == Triple::EABIHF || isTargetWindows() || isAAPCS16_ABI(); } + bool isTargetAndroid() const { return TargetTriple.isAndroid(); } - virtual bool isXRaySupported() const override; + bool isXRaySupported() const override; bool isAPCS_ABI() const; bool isAAPCS_ABI() const; @@ -588,6 +642,7 @@ public: bool useR7AsFramePointer() const { return isTargetDarwin() || (!isTargetWindows() && isThumb()); } + /// Returns true if the frame setup is split into two separate pushes (first /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent /// to lr. This is always required on Thumb1-only targets, as the push and @@ -656,6 +711,7 @@ public: /// True if fast-isel is used. bool useFastISel() const; }; -} // End llvm namespace -#endif // ARMSUBTARGET_H +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 70c9567d99f8..b8dadb331ecf 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -10,30 +10,50 @@ // //===----------------------------------------------------------------------===// -#include "ARMTargetMachine.h" #include "ARM.h" #include "ARMCallLowering.h" -#include "ARMFrameLowering.h" #include "ARMInstructionSelector.h" #include "ARMLegalizerInfo.h" #include "ARMRegisterBankInfo.h" +#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" #include "ARMTargetTransformInfo.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/ExecutionDepsFix.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" +#include <cassert> +#include <memory> +#include <string> + using namespace llvm; static cl::opt<bool> @@ -57,6 +77,10 @@ static cl::opt<cl::boolOrDefault> EnableGlobalMerge("arm-global-merge", cl::Hidden, cl::desc("Enable the global merge pass")); +namespace llvm { + void initializeARMExecutionDepsFixPass(PassRegistry&); +} + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget()); @@ -68,14 +92,16 @@ extern "C" void LLVMInitializeARMTarget() { initializeGlobalISel(Registry); initializeARMLoadStoreOptPass(Registry); initializeARMPreAllocLoadStoreOptPass(Registry); + initializeARMConstantIslandsPass(Registry); + initializeARMExecutionDepsFixPass(Registry); } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) - return make_unique<TargetLoweringObjectFileMachO>(); + return llvm::make_unique<TargetLoweringObjectFileMachO>(); if (TT.isOSWindows()) - return make_unique<TargetLoweringObjectFileCOFF>(); - return make_unique<ARMElfTargetObjectFile>(); + return llvm::make_unique<TargetLoweringObjectFileCOFF>(); + return llvm::make_unique<ARMElfTargetObjectFile>(); } static ARMBaseTargetMachine::ARMABI @@ -94,13 +120,13 @@ computeTargetABI(const Triple &TT, StringRef CPU, ARMBaseTargetMachine::ARMABI TargetABI = ARMBaseTargetMachine::ARM_ABI_UNKNOWN; - unsigned ArchKind = llvm::ARM::parseCPUArch(CPU); - StringRef ArchName = llvm::ARM::getArchName(ArchKind); + unsigned ArchKind = ARM::parseCPUArch(CPU); + StringRef ArchName = ARM::getArchName(ArchKind); // FIXME: This is duplicated code from the front end and should be unified. if (TT.isOSBinFormatMachO()) { - if (TT.getEnvironment() == llvm::Triple::EABI || - (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) || - llvm::ARM::parseArchProfile(ArchName) == llvm::ARM::PK_M) { + if (TT.getEnvironment() == Triple::EABI || + (TT.getOS() == Triple::UnknownOS && TT.isOSBinFormatMachO()) || + ARM::parseArchProfile(ArchName) == ARM::PK_M) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; } else if (TT.isWatchABI()) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16; @@ -113,16 +139,16 @@ computeTargetABI(const Triple &TT, StringRef CPU, } else { // Select the default based on the platform. switch (TT.getEnvironment()) { - case llvm::Triple::Android: - case llvm::Triple::GNUEABI: - case llvm::Triple::GNUEABIHF: - case llvm::Triple::MuslEABI: - case llvm::Triple::MuslEABIHF: - case llvm::Triple::EABIHF: - case llvm::Triple::EABI: + case Triple::Android: + case Triple::GNUEABI: + case Triple::GNUEABIHF: + case Triple::MuslEABI: + case Triple::MuslEABIHF: + case Triple::EABIHF: + case Triple::EABI: TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; break; - case llvm::Triple::GNU: + case Triple::GNU: TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; break; default: @@ -141,7 +167,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, const TargetOptions &Options, bool isLittle) { auto ABI = computeTargetABI(TT, CPU, Options); - std::string Ret = ""; + std::string Ret; if (isLittle) // Little endian. @@ -238,29 +264,35 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, } } -ARMBaseTargetMachine::~ARMBaseTargetMachine() {} +ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; #ifdef LLVM_BUILD_GLOBAL_ISEL namespace { + struct ARMGISelActualAccessor : public GISelAccessor { std::unique_ptr<CallLowering> CallLoweringInfo; std::unique_ptr<InstructionSelector> InstSelector; std::unique_ptr<LegalizerInfo> Legalizer; std::unique_ptr<RegisterBankInfo> RegBankInfo; + const CallLowering *getCallLowering() const override { return CallLoweringInfo.get(); } + const InstructionSelector *getInstructionSelector() const override { return InstSelector.get(); } + const LegalizerInfo *getLegalizerInfo() const override { return Legalizer.get(); } + const RegisterBankInfo *getRegBankInfo() const override { return RegBankInfo.get(); } }; -} // End anonymous namespace. + +} // end anonymous namespace #endif const ARMSubtarget * @@ -300,7 +332,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { #else ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor(); GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new ARMLegalizerInfo()); + GISel->Legalizer.reset(new ARMLegalizerInfo(*I)); auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo()); @@ -390,6 +422,7 @@ ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT, : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} namespace { + /// ARM Code Generator Pass Configuration Options. class ARMPassConfig : public TargetPassConfig { public: @@ -413,7 +446,21 @@ public: void addPreSched2() override; void addPreEmitPass() override; }; -} // namespace + +class ARMExecutionDepsFix : public ExecutionDepsFix { +public: + static char ID; + ARMExecutionDepsFix() : ExecutionDepsFix(ID, ARM::DPRRegClass) {} + StringRef getPassName() const override { + return "ARM Execution Dependency Fix"; + } +}; +char ARMExecutionDepsFix::ID; + +} // end anonymous namespace + +INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix", + "ARM Execution Dependency Fix", false, false) TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { return new ARMPassConfig(this, PM); @@ -508,7 +555,7 @@ void ARMPassConfig::addPreSched2() { if (EnableARMLoadStoreOpt) addPass(createARMLoadStoreOptimizationPass()); - addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); + addPass(new ARMExecutionDepsFix()); } // Expand some pseudo instructions into multiple instructions to allow diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h index c6b70b953162..f0ca9427d9fb 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -14,10 +14,14 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H #define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H -#include "ARMInstrInfo.h" #include "ARMSubtarget.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" +#include <memory> namespace llvm { @@ -32,7 +36,7 @@ public: protected: std::unique_ptr<TargetLoweringObjectFile> TLOF; - ARMSubtarget Subtarget; + ARMSubtarget Subtarget; bool isLittle; mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap; @@ -62,7 +66,8 @@ public: /// class ARMTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); - public: + +public: ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional<Reloc::Model> RM, CodeModel::Model CM, @@ -73,6 +78,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { /// class ARMLETargetMachine : public ARMTargetMachine { void anchor() override; + public: ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -84,6 +90,7 @@ public: /// class ARMBETargetMachine : public ARMTargetMachine { void anchor() override; + public: ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -97,6 +104,7 @@ public: /// class ThumbTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); + public: ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -108,6 +116,7 @@ public: /// class ThumbLETargetMachine : public ThumbTargetMachine { void anchor() override; + public: ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -119,6 +128,7 @@ public: /// class ThumbBETargetMachine : public ThumbTargetMachine { void anchor() override; + public: ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -128,4 +138,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp index 625c4280e1a6..94f9e8dfebbf 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -7,17 +7,20 @@ // //===----------------------------------------------------------------------===// -#include "ARMTargetObjectFile.h" +#include "ARMSubtarget.h" #include "ARMTargetMachine.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/IR/Mangler.h" +#include "ARMTargetObjectFile.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/SectionKind.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include <cassert> + using namespace llvm; using namespace dwarf; diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h index 24e755ddac27..dbb8128269dc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h @@ -11,19 +11,19 @@ #define LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCExpr.h" namespace llvm { -class MCContext; -class TargetMachine; - class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { mutable bool genExecuteOnly = false; + protected: - const MCSection *AttributesSection; + const MCSection *AttributesSection = nullptr; + public: ARMElfTargetObjectFile() - : TargetLoweringObjectFileELF(), AttributesSection(nullptr) { + : TargetLoweringObjectFileELF() { PLTRelativeVariantKind = MCSymbolRefExpr::VK_ARM_PREL31; } @@ -47,4 +47,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 2b6b36bc3e68..8eb9dbf5f9de 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -92,7 +92,8 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, } -int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -310,7 +311,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getVectorInstrCost(Opcode, ValTy, Index); } -int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { +int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. @@ -335,7 +337,7 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { return LT.first; } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, @@ -504,7 +506,7 @@ int ARMTTIImpl::getArithmeticInstrCost( } int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, const Instruction *I) { std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); if (Src->isVectorTy() && Alignment != 16 && @@ -529,12 +531,14 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) { unsigned NumElts = VecTy->getVectorNumElements(); - Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); + auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); // vldN/vstN only support legal vector types of size 64 or 128 in bits. - if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) - return Factor; + // Accesses having vector types that are a multiple of 128 bits can be + // matched to more than one vldN/vstN instruction. + if (NumElts % Factor == 0 && + TLI->isLegalInterleavedAccessType(SubVecTy, DL)) + return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL); } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 3c83cd92a61a..7de0543dfa5e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -33,10 +33,6 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { const ARMSubtarget *ST; const ARMTargetLowering *TLI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); - const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } @@ -98,9 +94,11 @@ public: int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr); - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); @@ -118,7 +116,7 @@ public: ArrayRef<const Value *> Args = ArrayRef<const Value *>()); int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c243a2d35979..f421d3ac1693 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -915,40 +915,37 @@ public: int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue())); return Val != -1; } - bool isFBits16() const { + + template<int64_t N, int64_t M> + bool isImmediate() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return Value >= 0 && Value <= 16; + return Value >= N && Value <= M; } - bool isFBits32() const { + template<int64_t N, int64_t M> + bool isImmediateS4() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return Value >= 1 && Value <= 32; + return ((Value & 3) == 0) && Value >= N && Value <= M; + } + bool isFBits16() const { + return isImmediate<0, 17>(); + } + bool isFBits32() const { + return isImmediate<1, 33>(); } bool isImm8s4() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020; + return isImmediateS4<-1020, 1020>(); } bool isImm0_1020s4() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return ((Value & 3) == 0) && Value >= 0 && Value <= 1020; + return isImmediateS4<0, 1020>(); } bool isImm0_508s4() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return ((Value & 3) == 0) && Value >= 0 && Value <= 508; + return isImmediateS4<0, 508>(); } bool isImm0_508s4Neg() const { if (!isImm()) return false; @@ -958,27 +955,6 @@ public: // explicitly exclude zero. we want that to use the normal 0_508 version. return ((Value & 3) == 0) && Value > 0 && Value <= 508; } - bool isImm0_239() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 240; - } - bool isImm0_255() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 256; - } - bool isImm0_4095() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 4096; - } bool isImm0_4095Neg() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -986,145 +962,17 @@ public: int64_t Value = -CE->getValue(); return Value > 0 && Value < 4096; } - bool isImm0_1() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 2; - } - bool isImm0_3() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 4; - } bool isImm0_7() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 8; - } - bool isImm0_15() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 16; - } - bool isImm0_31() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 32; - } - bool isImm0_63() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 64; - } - bool isImm8() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value == 8; - } - bool isImm16() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value == 16; - } - bool isImm32() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value == 32; - } - bool isShrImm8() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= 8; - } - bool isShrImm16() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= 16; - } - bool isShrImm32() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= 32; - } - bool isShrImm64() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= 64; - } - bool isImm1_7() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 8; - } - bool isImm1_15() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 16; - } - bool isImm1_31() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 32; + return isImmediate<0, 7>(); } bool isImm1_16() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 17; + return isImmediate<1, 16>(); } bool isImm1_32() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 33; - } - bool isImm0_32() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 33; + return isImmediate<1, 32>(); } - bool isImm0_65535() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 65536; + bool isImm8_255() const { + return isImmediate<8, 255>(); } bool isImm256_65535Expr() const { if (!isImm()) return false; @@ -1145,32 +993,16 @@ public: return Value >= 0 && Value < 65536; } bool isImm24bit() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value <= 0xffffff; + return isImmediate<0, 0xffffff + 1>(); } bool isImmThumbSR() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value < 33; + return isImmediate<1, 33>(); } bool isPKHLSLImm() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 32; + return isImmediate<0, 32>(); } bool isPKHASRImm() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= 32; + return isImmediate<0, 33>(); } bool isAdrLabel() const { // If we have an immediate that's not a constant, treat it as a label @@ -1245,6 +1077,20 @@ public: return ARM_AM::getSOImmVal(Value) == -1 && ARM_AM::getSOImmVal(-Value) != -1; } + bool isThumbModImmNeg1_7() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int32_t Value = -(int32_t)CE->getValue(); + return 0 < Value && Value < 8; + } + bool isThumbModImmNeg8_255() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int32_t Value = -(int32_t)CE->getValue(); + return 7 < Value && Value < 256; + } bool isConstantPoolImm() const { return Kind == k_ConstantPoolImmediate; } bool isBitfield() const { return Kind == k_BitfieldDescriptor; } bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } @@ -2035,6 +1881,20 @@ public: Inst.addOperand(MCOperand::createImm(Enc)); } + void addThumbModImmNeg8_255Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + uint32_t Val = -CE->getValue(); + Inst.addOperand(MCOperand::createImm(Val)); + } + + void addThumbModImmNeg1_7Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + uint32_t Val = -CE->getValue(); + Inst.addOperand(MCOperand::createImm(Val)); + } + void addBitfieldOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // Munge the lsb/width into a bitfield mask. @@ -2141,7 +2001,7 @@ public: // The operand is actually a t2_so_imm, but we have its bitwise // negation in the assembly source, so twiddle it here. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::createImm(~CE->getValue())); + Inst.addOperand(MCOperand::createImm(~(uint32_t)CE->getValue())); } void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const { @@ -2149,7 +2009,7 @@ public: // The operand is actually a t2_so_imm, but we have its // negation in the assembly source, so twiddle it here. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::createImm(-CE->getValue())); + Inst.addOperand(MCOperand::createImm(-(uint32_t)CE->getValue())); } void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const { @@ -4330,7 +4190,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { // If some specific flag is already set, it means that some letter is // present more than once, this is not acceptable. - if (FlagsVal == ~0U || (FlagsVal & Flag)) + if (Flag == ~0U || (FlagsVal & Flag)) return MatchOperand_NoMatch; FlagsVal |= Flag; } @@ -5484,7 +5344,8 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { enum { COFF = (1 << MCObjectFileInfo::IsCOFF), ELF = (1 << MCObjectFileInfo::IsELF), - MACHO = (1 << MCObjectFileInfo::IsMachO) + MACHO = (1 << MCObjectFileInfo::IsMachO), + WASM = (1 << MCObjectFileInfo::IsWasm), }; static const struct PrefixEntry { const char *Spelling; @@ -5518,6 +5379,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { case MCObjectFileInfo::IsCOFF: CurrentFormat = COFF; break; + case MCObjectFileInfo::IsWasm: + CurrentFormat = WASM; + break; } if (~Prefix->SupportedFormats & CurrentFormat) { @@ -6301,10 +6165,6 @@ bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst, else if (ListContainsPC && ListContainsLR) return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), "PC and LR may not be in the register list simultaneously"); - else if (inITBlock() && !lastInITBlock() && ListContainsPC) - return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), - "instruction must be outside of IT block or the last " - "instruction in an IT block"); return false; } @@ -6366,6 +6226,12 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return Warning(Loc, "predicated instructions should be in IT block"); } + // PC-setting instructions in an IT block, but not the last instruction of + // the block, are UNPREDICTABLE. + if (inExplicitITBlock() && !lastInITBlock() && isITBlockTerminator(Inst)) { + return Error(Loc, "instruction must be outside of IT block or the last instruction in an IT block"); + } + const unsigned Opcode = Inst.getOpcode(); switch (Opcode) { case ARM::LDRD: @@ -6676,6 +6542,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, break; } case ARM::MOVi16: + case ARM::MOVTi16: case ARM::t2MOVi16: case ARM::t2MOVTi16: { @@ -8232,7 +8099,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, case ARM::t2LSRri: case ARM::t2ASRri: { if (isARMLowRegister(Inst.getOperand(0).getReg()) && - Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && !(static_cast<ARMOperand &>(*Operands[3]).isToken() && static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) { @@ -8307,23 +8174,38 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isNarrow = true; MCInst TmpInst; unsigned newOpc; - switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) { - default: llvm_unreachable("unexpected opcode!"); - case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break; - case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break; - case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break; - case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break; - case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break; - } + unsigned Shift = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm()); unsigned Amount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()); + bool isMov = false; + // MOV rd, rm, LSL #0 is actually a MOV instruction + if (Shift == ARM_AM::lsl && Amount == 0) { + isMov = true; + // The 16-bit encoding of MOV rd, rm, LSL #N is explicitly encoding T2 of + // MOV (register) in the ARMv8-A and ARMv8-M manuals, and immediate 0 is + // unpredictable in an IT block so the 32-bit encoding T3 has to be used + // instead. + if (inITBlock()) { + isNarrow = false; + } + newOpc = isNarrow ? ARM::tMOVSr : ARM::t2MOVr; + } else { + switch(Shift) { + default: llvm_unreachable("unexpected opcode!"); + case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break; + case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break; + case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break; + case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break; + case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break; + } + } if (Amount == 32) Amount = 0; TmpInst.setOpcode(newOpc); TmpInst.addOperand(Inst.getOperand(0)); // Rd - if (isNarrow) + if (isNarrow && !isMov) TmpInst.addOperand(MCOperand::createReg( Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); TmpInst.addOperand(Inst.getOperand(1)); // Rn - if (newOpc != ARM::t2RRX) + if (newOpc != ARM::t2RRX && !isMov) TmpInst.addOperand(MCOperand::createImm(Amount)); TmpInst.addOperand(Inst.getOperand(3)); // CondCode TmpInst.addOperand(Inst.getOperand(4)); @@ -8918,6 +8800,9 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR && inITBlock()) return Match_RequiresNotITBlock; + // LSL with zero immediate is not allowed in an IT block + if (Opc == ARM::tLSLri && Inst.getOperand(3).getImm() == 0 && inITBlock()) + return Match_RequiresNotITBlock; } else if (isThumbOne()) { // Some high-register supporting Thumb1 encodings only allow both registers // to be from r0-r7 when in Thumb2. @@ -8932,6 +8817,22 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_RequiresV6; } + // Before ARMv8 the rules for when SP is allowed in t2MOVr are more complex + // than the loop below can handle, so it uses the GPRnopc register class and + // we do SP handling here. + if (Opc == ARM::t2MOVr && !hasV8Ops()) + { + // SP as both source and destination is not allowed + if (Inst.getOperand(0).getReg() == ARM::SP && + Inst.getOperand(1).getReg() == ARM::SP) + return Match_RequiresV8; + // When flags-setting SP as either source or destination is not allowed + if (Inst.getOperand(4).getReg() == ARM::CPSR && + (Inst.getOperand(0).getReg() == ARM::SP || + Inst.getOperand(1).getReg() == ARM::SP)) + return Match_RequiresV8; + } + for (unsigned I = 0; I < MCID.NumOperands; ++I) if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) { // rGPRRegClass excludes PC, and also excluded SP before ARMv8 @@ -8945,7 +8846,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } namespace llvm { -template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) { +template <> inline bool IsCPSRDead<MCInst>(const MCInst *Instr) { return true; // In an assembly source, no need to second-guess } } @@ -8975,6 +8876,7 @@ bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const { // operands. We only care about Thumb instructions here, as ARM instructions // obviously can't be in an IT block. switch (Inst.getOpcode()) { + case ARM::tLDMIA: case ARM::t2LDMIA: case ARM::t2LDMIA_UPD: case ARM::t2LDMDB: @@ -9088,6 +8990,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, MatchResult = MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm, PendConditionalInstruction, Out); + SMLoc ErrorLoc; + if (ErrorInfo < Operands.size()) { + ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + switch (MatchResult) { case Match_Success: // Context sensitive operand constraints aren't handled by the matcher, @@ -9177,16 +9086,52 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "instruction variant requires ARMv8 or later"); case Match_RequiresFlagSetting: return Error(IDLoc, "no flag-preserving variant of this instruction available"); - case Match_ImmRange0_15: { - SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + case Match_ImmRange0_1: + return Error(ErrorLoc, "immediate operand must be in the range [0,1]"); + case Match_ImmRange0_3: + return Error(ErrorLoc, "immediate operand must be in the range [0,3]"); + case Match_ImmRange0_7: + return Error(ErrorLoc, "immediate operand must be in the range [0,7]"); + case Match_ImmRange0_15: return Error(ErrorLoc, "immediate operand must be in the range [0,15]"); - } - case Match_ImmRange0_239: { - SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + case Match_ImmRange0_31: + return Error(ErrorLoc, "immediate operand must be in the range [0,31]"); + case Match_ImmRange0_32: + return Error(ErrorLoc, "immediate operand must be in the range [0,32]"); + case Match_ImmRange0_63: + return Error(ErrorLoc, "immediate operand must be in the range [0,63]"); + case Match_ImmRange0_239: return Error(ErrorLoc, "immediate operand must be in the range [0,239]"); - } + case Match_ImmRange0_255: + return Error(ErrorLoc, "immediate operand must be in the range [0,255]"); + case Match_ImmRange0_4095: + return Error(ErrorLoc, "immediate operand must be in the range [0,4095]"); + case Match_ImmRange0_65535: + return Error(ErrorLoc, "immediate operand must be in the range [0,65535]"); + case Match_ImmRange1_7: + return Error(ErrorLoc, "immediate operand must be in the range [1,7]"); + case Match_ImmRange1_8: + return Error(ErrorLoc, "immediate operand must be in the range [1,8]"); + case Match_ImmRange1_15: + return Error(ErrorLoc, "immediate operand must be in the range [1,15]"); + case Match_ImmRange1_16: + return Error(ErrorLoc, "immediate operand must be in the range [1,16]"); + case Match_ImmRange1_31: + return Error(ErrorLoc, "immediate operand must be in the range [1,31]"); + case Match_ImmRange1_32: + return Error(ErrorLoc, "immediate operand must be in the range [1,32]"); + case Match_ImmRange1_64: + return Error(ErrorLoc, "immediate operand must be in the range [1,64]"); + case Match_ImmRange8_8: + return Error(ErrorLoc, "immediate operand must be 8."); + case Match_ImmRange16_16: + return Error(ErrorLoc, "immediate operand must be 16."); + case Match_ImmRange32_32: + return Error(ErrorLoc, "immediate operand must be 32."); + case Match_ImmRange256_65535: + return Error(ErrorLoc, "immediate operand must be in the range [255,65535]"); + case Match_ImmRange0_16777215: + return Error(ErrorLoc, "immediate operand must be in the range [0,0xffffff]"); case Match_AlignedMemoryRequiresNone: case Match_DupAlignedMemoryRequiresNone: case Match_AlignedMemoryRequires16: diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ac3d8c780af2..e812d32cc76f 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -7,21 +7,24 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" -#include "MCTargetDesc/ARMMCExpr.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/Debug.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/LEB128.h" -#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" +#include <algorithm> +#include <cassert> +#include <cstdint> #include <vector> using namespace llvm; @@ -31,6 +34,7 @@ using namespace llvm; typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { + // Handles the condition code status of instructions in IT blocks class ITStatus { @@ -81,9 +85,7 @@ namespace { private: std::vector<unsigned char> ITStates; }; -} -namespace { /// ARM disassembler for all ARM platforms. class ARMDisassembler : public MCDisassembler { public: @@ -91,7 +93,7 @@ public: MCDisassembler(STI, Ctx) { } - ~ARMDisassembler() override {} + ~ARMDisassembler() override = default; DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -106,7 +108,7 @@ public: MCDisassembler(STI, Ctx) { } - ~ThumbDisassembler() override {} + ~ThumbDisassembler() override = default; DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -118,7 +120,8 @@ private: DecodeStatus AddThumbPredicate(MCInst&) const; void UpdateThumbVFPPredicate(MCInst&) const; }; -} + +} // end anonymous namespace static bool Check(DecodeStatus &Out, DecodeStatus In) { switch (In) { @@ -135,7 +138,6 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { llvm_unreachable("Invalid DecodeStatus!"); } - // Forward declare these because the autogenerated code will reference them. // Definitions are further down. static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, @@ -319,7 +321,6 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); - static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, @@ -395,8 +396,9 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); + #include "ARMGenDisassemblerTables.inc" static MCDisassembler *createARMDisassembler(const Target &T, @@ -416,8 +418,7 @@ static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size, uint64_t Address, raw_ostream &OS, raw_ostream &CS, uint32_t Insn, - DecodeStatus Result) -{ + DecodeStatus Result) { switch (MI.getOpcode()) { case ARM::HVC: { // HVC is undefined if condition = 0xf otherwise upredictable @@ -461,65 +462,28 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result); } - // VFP and NEON instructions, similarly, are shared between ARM - // and Thumb modes. - Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - return Result; - } - - Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - return Result; - } - - Result = - decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - // Add a fake predicate operand, because we share these instruction - // definitions with Thumb2 where these instructions are predicable. - if (!DecodePredicateOperand(MI, 0xE, Address, this)) - return MCDisassembler::Fail; - return Result; - } - - Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address, - this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - // Add a fake predicate operand, because we share these instruction - // definitions with Thumb2 where these instructions are predicable. - if (!DecodePredicateOperand(MI, 0xE, Address, this)) - return MCDisassembler::Fail; - return Result; - } - - Result = - decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - // Add a fake predicate operand, because we share these instruction - // definitions with Thumb2 where these instructions are predicable. - if (!DecodePredicateOperand(MI, 0xE, Address, this)) - return MCDisassembler::Fail; - return Result; - } + struct DecodeTable { + const uint8_t *P; + bool DecodePred; + }; - Result = - decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - return Result; - } + const DecodeTable Tables[] = { + {DecoderTableVFP32, false}, {DecoderTableVFPV832, false}, + {DecoderTableNEONData32, true}, {DecoderTableNEONLoadStore32, true}, + {DecoderTableNEONDup32, true}, {DecoderTablev8NEON32, false}, + {DecoderTablev8Crypto32, false}, + }; - Result = - decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI); - if (Result != MCDisassembler::Fail) { - Size = 4; - return Result; + for (auto Table : Tables) { + Result = decodeInstruction(Table.P, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + // Add a fake predicate operand, because we share these instruction + // definitions with Thumb2 where these instructions are predicable. + if (Table.DecodePred && !DecodePredicateOperand(MI, 0xE, Address, this)) + return MCDisassembler::Fail; + return Result; + } } Size = 0; @@ -527,8 +491,10 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } namespace llvm { + extern const MCInstrDesc ARMInsts[]; -} + +} // end namespace llvm /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the /// immediate Value in the MCInst. The immediate Value has had any PC @@ -859,7 +825,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return MCDisassembler::Fail; } - extern "C" void LLVMInitializeARMDisassembler() { TargetRegistry::RegisterMCDisassembler(getTheARMLETarget(), createARMDisassembler); @@ -1056,7 +1021,6 @@ static const uint16_t QPRDecoderTable[] = { ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15 }; - static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31 || (RegNo & 1) != 0) @@ -1676,7 +1640,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: - if (type && Rn == 15){ + if (type && Rn == 15) { if (Rt2 == 15) S = MCDisassembler::SoftFail; break; @@ -1693,7 +1657,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, case ARM::LDRH: case ARM::LDRH_PRE: case ARM::LDRH_POST: - if (type && Rn == 15){ + if (type && Rn == 15) { if (Rt == 15) S = MCDisassembler::SoftFail; break; @@ -1711,7 +1675,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, case ARM::LDRSB: case ARM::LDRSB_PRE: case ARM::LDRSB_POST: - if (type && Rn == 15){ + if (type && Rn == 15) { if (Rt == 15) S = MCDisassembler::SoftFail; break; @@ -2309,7 +2273,6 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3748,7 +3711,6 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } - static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4073,7 +4035,7 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder){ + uint64_t Address, const void *Decoder) { if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4, true, 2, Inst, Decoder)) Inst.addOperand(MCOperand::createImm(SignExtend32<9>(Val << 1))); @@ -4081,7 +4043,8 @@ DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, } static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder){ + uint64_t Address, + const void *Decoder) { // Val is passed in as S:J1:J2:imm10:imm11 // Note no trailing zero after imm11. Also the J1 and J2 values are from // the encoded instruction. So here change to I1 and I2 values via: @@ -4247,7 +4210,8 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, } static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, - uint64_t Address, const void *Decoder){ + uint64_t Address, + const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rd = fieldFromInstruction(Insn, 12, 4); @@ -4323,7 +4287,6 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4506,7 +4469,6 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4637,7 +4599,6 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4771,7 +4732,6 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -5266,9 +5226,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - DecodeStatus S = MCDisassembler::Success; unsigned CRm = fieldFromInstruction(Val, 0, 4); diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 3667952d44c0..57b91366a085 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -20,7 +20,15 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> + using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -73,7 +81,6 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, unsigned Opcode = MI->getOpcode(); switch (Opcode) { - // Check for MOVs and print canonical forms, instead. case ARM::MOVsr: { // FIXME: Thumb variants? diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 9d80eed84dc2..86873a3a6ccb 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -235,4 +235,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index a58d5b34131b..40bf545e8322 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -357,13 +357,14 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, } unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, - bool IsPCRel, MCContext *Ctx, + bool IsPCRel, MCContext &Ctx, bool IsLittleEndian, bool IsResolved) const { unsigned Kind = Fixup.getKind(); switch (Kind) { default: - llvm_unreachable("Unknown fixup kind!"); + Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type"); + return 0; case FK_Data_1: case FK_Data_2: case FK_Data_4: @@ -412,8 +413,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Value = -Value; isAdd = false; } - if (Ctx && Value >= 4096) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Value >= 4096) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } Value |= isAdd << 23; @@ -433,8 +434,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Value = -Value; opc = 2; // 0b0010 } - if (Ctx && ARM_AM::getSOImmVal(Value) == -1) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (ARM_AM::getSOImmVal(Value) == -1) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } // Encode the immediate and shift the opcode into place. @@ -541,8 +542,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // // Note that the halfwords are stored high first, low second; so we need // to transpose the fixup value here to map properly. - if (Ctx && Value % 4 != 0) { - Ctx->reportError(Fixup.getLoc(), "misaligned ARM call destination"); + if (Value % 4 != 0) { + Ctx.reportError(Fixup.getLoc(), "misaligned ARM call destination"); return 0; } @@ -568,10 +569,10 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_thumb_cp: // On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we // could have an error on our hands. - if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) { + if (!STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) { const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); if (FixupDiagnostic) { - Ctx->reportError(Fixup.getLoc(), FixupDiagnostic); + Ctx.reportError(Fixup.getLoc(), FixupDiagnostic); return 0; } } @@ -581,8 +582,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // CB instructions can only branch to offsets in [4, 126] in multiples of 2 // so ensure that the raw value LSB is zero and it lies in [2, 130]. // An offset of 2 will be relaxed to a NOP. - if (Ctx && ((int64_t)Value < 2 || Value > 0x82 || Value & 1)) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if ((int64_t)Value < 2 || Value > 0x82 || Value & 1) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } // Offset by 4 and don't encode the lower bit, which is always 0. @@ -592,21 +593,21 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } case ARM::fixup_arm_thumb_br: // Offset by 4 and don't encode the lower bit, which is always 0. - if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && - !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) { + if (!STI->getFeatureBits()[ARM::FeatureThumb2] && + !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) { const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); if (FixupDiagnostic) { - Ctx->reportError(Fixup.getLoc(), FixupDiagnostic); + Ctx.reportError(Fixup.getLoc(), FixupDiagnostic); return 0; } } return ((Value - 4) >> 1) & 0x7ff; case ARM::fixup_arm_thumb_bcc: // Offset by 4 and don't encode the lower bit, which is always 0. - if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) { + if (!STI->getFeatureBits()[ARM::FeatureThumb2]) { const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); if (FixupDiagnostic) { - Ctx->reportError(Fixup.getLoc(), FixupDiagnostic); + Ctx.reportError(Fixup.getLoc(), FixupDiagnostic); return 0; } } @@ -620,8 +621,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, isAdd = false; } // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8]. - if (Ctx && Value >= 256) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Value >= 256) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } Value = (Value & 0xf) | ((Value & 0xf0) << 4); @@ -641,8 +642,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } // These values don't encode the low two bits since they're always zero. Value >>= 2; - if (Ctx && Value >= 256) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Value >= 256) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } Value |= isAdd << 23; @@ -667,13 +668,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, isAdd = false; } // These values don't encode the low bit since it's always zero. - if (Ctx && (Value & 1)) { - Ctx->reportError(Fixup.getLoc(), "invalid value for this fixup"); + if (Value & 1) { + Ctx.reportError(Fixup.getLoc(), "invalid value for this fixup"); return 0; } Value >>= 1; - if (Ctx && Value >= 256) { - Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + if (Value >= 256) { + Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); return 0; } Value |= isAdd << 23; @@ -687,8 +688,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } case ARM::fixup_arm_mod_imm: Value = ARM_AM::getSOImmVal(Value); - if (Ctx && Value >> 12) { - Ctx->reportError(Fixup.getLoc(), "out of range immediate fixup value"); + if (Value >> 12) { + Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value"); return 0; } return Value; @@ -737,12 +738,6 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) IsResolved = false; - - // Try to get the encoded value for the fixup as-if we're mapping it into - // the instruction. This allows adjustFixupValue() to issue a diagnostic - // if the value aren't invalid. - (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(), - IsLittleEndian, IsResolved); } /// getFixupKindNumBytes - The number of bytes the fixup may change. @@ -846,11 +841,10 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { } void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel) const { + unsigned DataSize, uint64_t Value, bool IsPCRel, + MCContext &Ctx) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - Value = - adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian, true); + Value = adjustFixupValue(Fixup, Value, IsPCRel, Ctx, IsLittleEndian, true); if (!Value) return; // Doesn't change encoding. diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 84caaacc47d3..2ddedb5d6105 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -46,11 +46,11 @@ public: bool &IsResolved) override; unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel, - MCContext *Ctx, bool IsLittleEndian, + MCContext &Ctx, bool IsLittleEndian, bool IsResolved) const; void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; unsigned getRelaxedOpcode(unsigned Op) const; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 088b4205ed62..92e553f21f14 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -291,7 +291,11 @@ namespace ARMII { /// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects /// just that part of the flag set. - MO_OPTION_MASK = 0x1f, + MO_OPTION_MASK = 0x0f, + + /// MO_SBREL - On a symbol operand, this represents a static base relative + /// relocation. Used in movw and movt instructions. + MO_SBREL = 0x10, /// MO_DLLIMPORT - On a symbol operand, this represents that the reference /// to the symbol is for an import stub. This is used for DLL import diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 6f19754b899e..e1fa24571820 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -7,32 +7,32 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMFixupKinds.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSwitch.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include <cstdint> using namespace llvm; namespace { + class ARMELFObjectWriter : public MCELFObjectTargetWriter { enum { DefaultEABIVersion = 0x05000000U }; - unsigned GetRelocTypeInner(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const; + unsigned GetRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, MCContext &Ctx) const; public: ARMELFObjectWriter(uint8_t OSABI); - ~ARMELFObjectWriter() override; + ~ARMELFObjectWriter() override = default; unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override; @@ -40,15 +40,14 @@ namespace { bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override; }; -} + +} // end anonymous namespace ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM, /*HasRelocationAddend*/ false) {} -ARMELFObjectWriter::~ARMELFObjectWriter() {} - bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const { // FIXME: This is extremely conservative. This really needs to use a @@ -70,19 +69,20 @@ bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, unsigned ARMELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - return GetRelocTypeInner(Target, Fixup, IsPCRel); + return GetRelocTypeInner(Target, Fixup, IsPCRel, Ctx); } unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel) const { + bool IsPCRel, + MCContext &Ctx) const { MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); unsigned Type = 0; if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { default: - report_fatal_error("unsupported relocation on symbol"); + Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol"); return ELF::R_ARM_NONE; case FK_Data_4: switch (Modifier) { @@ -161,7 +161,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, } else { switch ((unsigned)Fixup.getKind()) { default: - report_fatal_error("unsupported relocation on symbol"); + Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol"); return ELF::R_ARM_NONE; case FK_Data_1: switch (Modifier) { @@ -270,10 +270,26 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, } break; case ARM::fixup_t2_movt_hi16: - Type = ELF::R_ARM_THM_MOVT_ABS; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_THM_MOVT_ABS; + break; + case MCSymbolRefExpr::VK_ARM_SBREL: + Type = ELF:: R_ARM_THM_MOVT_BREL; + break; + } break; case ARM::fixup_t2_movw_lo16: - Type = ELF::R_ARM_THM_MOVW_ABS_NC; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_THM_MOVW_ABS_NC; + break; + case MCSymbolRefExpr::VK_ARM_SBREL: + Type = ELF:: R_ARM_THM_MOVW_BREL_NC; + break; + } break; } } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index f6bb35d2326b..6fa890ba1cd5 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -15,7 +15,11 @@ #include "ARMRegisterInfo.h" #include "ARMUnwindOpAsm.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" @@ -24,25 +28,33 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCValue.h" +#include "llvm/MC/SectionKind.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/TargetParser.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetParser.h" #include <algorithm> +#include <cassert> +#include <climits> +#include <cstddef> +#include <cstdint> +#include <string> using namespace llvm; @@ -101,16 +113,21 @@ ARMTargetAsmStreamer::ARMTargetAsmStreamer(MCStreamer &S, bool VerboseAsm) : ARMTargetStreamer(S), OS(OS), InstPrinter(InstPrinter), IsVerboseAsm(VerboseAsm) {} + void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; } void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; } void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; } + void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) { OS << "\t.personality " << Personality->getName() << '\n'; } + void ARMTargetAsmStreamer::emitPersonalityIndex(unsigned Index) { OS << "\t.personalityindex " << Index << '\n'; } + void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; } + void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { OS << "\t.setfp\t"; @@ -121,6 +138,7 @@ void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, OS << ", #" << Offset; OS << '\n'; } + void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) { assert((Reg != ARM::SP && Reg != ARM::PC) && "the operand of .movsp cannot be either sp or pc"); @@ -131,9 +149,11 @@ void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) { OS << ", #" << Offset; OS << '\n'; } + void ARMTargetAsmStreamer::emitPad(int64_t Offset) { OS << "\t.pad\t#" << Offset << '\n'; } + void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector) { assert(RegList.size() && "RegList should not be empty"); @@ -151,8 +171,9 @@ void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, OS << "}\n"; } -void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) { -} + +void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {} + void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) { OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value); if (IsVerboseAsm) { @@ -162,6 +183,7 @@ void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) { } OS << "\n"; } + void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute, StringRef String) { switch (Attribute) { @@ -179,6 +201,7 @@ void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute, } OS << "\n"; } + void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StringValue) { @@ -194,20 +217,25 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute, } OS << "\n"; } + void ARMTargetAsmStreamer::emitArch(unsigned Arch) { OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n"; } + void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) { OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n"; } + void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) { OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n'; } + void ARMTargetAsmStreamer::emitFPU(unsigned FPU) { OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n"; } -void ARMTargetAsmStreamer::finishAttributeSection() { -} + +void ARMTargetAsmStreamer::finishAttributeSection() {} + void ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { OS << "\t.tlsdescseq\t" << S->getSymbol().getName(); @@ -274,12 +302,12 @@ private: }; StringRef CurrentVendor; - unsigned FPU; - unsigned Arch; - unsigned EmittedArch; + unsigned FPU = ARM::FK_INVALID; + unsigned Arch = ARM::AK_INVALID; + unsigned EmittedArch = ARM::AK_INVALID; SmallVector<AttributeItem, 64> Contents; - MCSection *AttributeSection; + MCSection *AttributeSection = nullptr; AttributeItem *getAttributeItem(unsigned Attribute) { for (size_t i = 0; i < Contents.size(); ++i) @@ -393,9 +421,7 @@ private: public: ARMTargetELFStreamer(MCStreamer &S) - : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID), - Arch(ARM::AK_INVALID), EmittedArch(ARM::AK_INVALID), - AttributeSection(nullptr) {} + : ARMTargetStreamer(S), CurrentVendor("aeabi") {} }; /// Extend the generic ELFStreamer class so that it can emit mapping symbols at @@ -416,12 +442,11 @@ public: ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS, MCCodeEmitter *Emitter, bool IsThumb) - : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb), - MappingSymbolCounter(0), LastEMS(EMS_None) { + : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb) { EHReset(); } - ~ARMELFStreamer() {} + ~ARMELFStreamer() override = default; void FinishImpl() override; @@ -439,20 +464,21 @@ public: void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes); void ChangeSection(MCSection *Section, const MCExpr *Subsection) override { - // We have to keep track of the mapping symbol state of any sections we - // use. Each one should start off as EMS_None, which is provided as the - // default constructor by DenseMap::lookup. - LastMappingSymbols[getPreviousSection().first] = LastEMS; - LastEMS = LastMappingSymbols.lookup(Section); - + LastMappingSymbols[getPreviousSection().first] = std::move(LastEMSInfo); MCELFStreamer::ChangeSection(Section, Subsection); + auto LastMappingSymbol = LastMappingSymbols.find(Section); + if (LastMappingSymbol != LastMappingSymbols.end()) { + LastEMSInfo = std::move(LastMappingSymbol->second); + return; + } + LastEMSInfo.reset(new ElfMappingSymbolInfo(SMLoc(), nullptr, 0)); } /// This function is the one used to emit instruction data into the ELF /// streamer. We override it to add the appropriate mapping symbol if /// necessary. - void EmitInstruction(const MCInst& Inst, - const MCSubtargetInfo &STI) override { + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool) override { if (IsThumb) EmitThumbMappingSymbol(); else @@ -507,15 +533,25 @@ public: MCELFStreamer::EmitBytes(Data); } + void FlushPendingMappingSymbol() { + if (!LastEMSInfo->hasInfo()) + return; + ElfMappingSymbolInfo *EMS = LastEMSInfo.get(); + EmitMappingSymbol("$d", EMS->Loc, EMS->F, EMS->Offset); + EMS->resetInfo(); + } + /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override { - if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value)) + if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value)) { if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) { getContext().reportError(Loc, "relocated expression must be 32-bit"); return; } + getOrCreateDataFragment(); + } EmitDataMappingSymbol(); MCELFStreamer::EmitValueImpl(Value, Size, Loc); @@ -548,22 +584,54 @@ private: EMS_Data }; + struct ElfMappingSymbolInfo { + explicit ElfMappingSymbolInfo(SMLoc Loc, MCFragment *F, uint64_t O) + : Loc(Loc), F(F), Offset(O), State(EMS_None) {} + void resetInfo() { + F = nullptr; + Offset = 0; + } + bool hasInfo() { return F != nullptr; } + SMLoc Loc; + MCFragment *F; + uint64_t Offset; + ElfMappingSymbol State; + }; + void EmitDataMappingSymbol() { - if (LastEMS == EMS_Data) return; + if (LastEMSInfo->State == EMS_Data) + return; + else if (LastEMSInfo->State == EMS_None) { + // This is a tentative symbol, it won't really be emitted until it's + // actually needed. + ElfMappingSymbolInfo *EMS = LastEMSInfo.get(); + auto *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); + if (!DF) + return; + EMS->Loc = SMLoc(); + EMS->F = getCurrentFragment(); + EMS->Offset = DF->getContents().size(); + LastEMSInfo->State = EMS_Data; + return; + } EmitMappingSymbol("$d"); - LastEMS = EMS_Data; + LastEMSInfo->State = EMS_Data; } void EmitThumbMappingSymbol() { - if (LastEMS == EMS_Thumb) return; + if (LastEMSInfo->State == EMS_Thumb) + return; + FlushPendingMappingSymbol(); EmitMappingSymbol("$t"); - LastEMS = EMS_Thumb; + LastEMSInfo->State = EMS_Thumb; } void EmitARMMappingSymbol() { - if (LastEMS == EMS_ARM) return; + if (LastEMSInfo->State == EMS_ARM) + return; + FlushPendingMappingSymbol(); EmitMappingSymbol("$a"); - LastEMS = EMS_ARM; + LastEMSInfo->State = EMS_ARM; } void EmitMappingSymbol(StringRef Name) { @@ -576,6 +644,17 @@ private: Symbol->setExternal(false); } + void EmitMappingSymbol(StringRef Name, SMLoc Loc, MCFragment *F, + uint64_t Offset) { + auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol( + Name + "." + Twine(MappingSymbolCounter++))); + EmitLabel(Symbol, Loc, F); + Symbol->setType(ELF::STT_NOTYPE); + Symbol->setBinding(ELF::STB_LOCAL); + Symbol->setExternal(false); + Symbol->setOffset(Offset); + } + void EmitThumbFunc(MCSymbol *Func) override { getAssembler().setIsThumbFunc(Func); EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction); @@ -599,10 +678,12 @@ private: void EmitFixup(const MCExpr *Expr, MCFixupKind Kind); bool IsThumb; - int64_t MappingSymbolCounter; + int64_t MappingSymbolCounter = 0; + + DenseMap<const MCSection *, std::unique_ptr<ElfMappingSymbolInfo>> + LastMappingSymbols; - DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; - ElfMappingSymbol LastEMS; + std::unique_ptr<ElfMappingSymbolInfo> LastEMSInfo; // ARM Exception Handling Frame Information MCSymbol *ExTab; @@ -618,6 +699,7 @@ private: SmallVector<uint8_t, 64> Opcodes; UnwindOpcodeAssembler UnwindOpAsm; }; + } // end anonymous namespace ARMELFStreamer &ARMTargetELFStreamer::getStreamer() { @@ -627,33 +709,42 @@ ARMELFStreamer &ARMTargetELFStreamer::getStreamer() { void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); } void ARMTargetELFStreamer::emitFnEnd() { getStreamer().emitFnEnd(); } void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); } + void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) { getStreamer().emitPersonality(Personality); } + void ARMTargetELFStreamer::emitPersonalityIndex(unsigned Index) { getStreamer().emitPersonalityIndex(Index); } + void ARMTargetELFStreamer::emitHandlerData() { getStreamer().emitHandlerData(); } + void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { getStreamer().emitSetFP(FpReg, SpReg, Offset); } + void ARMTargetELFStreamer::emitMovSP(unsigned Reg, int64_t Offset) { getStreamer().emitMovSP(Reg, Offset); } + void ARMTargetELFStreamer::emitPad(int64_t Offset) { getStreamer().emitPad(Offset); } + void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector) { getStreamer().emitRegSave(RegList, isVector); } + void ARMTargetELFStreamer::emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes) { getStreamer().emitUnwindRaw(Offset, Opcodes); } + void ARMTargetELFStreamer::switchVendor(StringRef Vendor) { assert(!Vendor.empty() && "Vendor cannot be empty."); @@ -668,25 +759,31 @@ void ARMTargetELFStreamer::switchVendor(StringRef Vendor) { CurrentVendor = Vendor; } + void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) { setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true); } + void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute, StringRef Value) { setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true); } + void ARMTargetELFStreamer::emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StringValue) { setAttributeItems(Attribute, IntValue, StringValue, /* OverwriteExisting= */ true); } + void ARMTargetELFStreamer::emitArch(unsigned Value) { Arch = Value; } + void ARMTargetELFStreamer::emitObjectArch(unsigned Value) { EmittedArch = Value; } + void ARMTargetELFStreamer::emitArchDefaultAttributes() { using namespace ARMBuildAttrs; @@ -786,9 +883,11 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { break; } } + void ARMTargetELFStreamer::emitFPU(unsigned Value) { FPU = Value; } + void ARMTargetELFStreamer::emitFPUDefaultAttributes() { switch (FPU) { case ARM::FK_VFP: @@ -920,6 +1019,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { break; } } + size_t ARMTargetELFStreamer::calculateContentSize() const { size_t Result = 0; for (size_t i = 0; i < Contents.size(); ++i) { @@ -944,6 +1044,7 @@ size_t ARMTargetELFStreamer::calculateContentSize() const { } return Result; } + void ARMTargetELFStreamer::finishAttributeSection() { // <format-version> // [ <section-length> "vendor-name" @@ -1093,9 +1194,9 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix, const MCSymbolELF *Group = FnSection.getGroup(); if (Group) Flags |= ELF::SHF_GROUP; - MCSectionELF *EHSection = - getContext().getELFSection(EHSecName, Type, Flags, 0, Group, - FnSection.getUniqueID(), nullptr, &FnSection); + MCSectionELF *EHSection = getContext().getELFSection( + EHSecName, Type, Flags, 0, Group, FnSection.getUniqueID(), + static_cast<const MCSymbolELF *>(&Fn)); assert(EHSection && "Failed to get the required EH section"); @@ -1114,6 +1215,7 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER, SectionKind::getData(), FnStart); } + void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) { MCDataFragment *Frag = getOrCreateDataFragment(); Frag->getFixups().push_back(MCFixup::create(Frag->getContents().size(), Expr, @@ -1396,8 +1498,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; - } - } - +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 559a4f8de75f..d9df2c6da7ec 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -11,22 +11,33 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> using namespace llvm; @@ -36,9 +47,8 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created."); namespace { + class ARMMCCodeEmitter : public MCCodeEmitter { - ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete; - void operator=(const ARMMCCodeEmitter &) = delete; const MCInstrInfo &MCII; const MCContext &CTX; bool IsLittleEndian; @@ -47,15 +57,18 @@ public: ARMMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool IsLittle) : MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) { } - - ~ARMMCCodeEmitter() override {} + ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete; + ARMMCCodeEmitter &operator=(const ARMMCCodeEmitter &) = delete; + ~ARMMCCodeEmitter() override = default; bool isThumb(const MCSubtargetInfo &STI) const { return STI.getFeatureBits()[ARM::ModeThumb]; } + bool isThumb2(const MCSubtargetInfo &STI) const { return isThumb(STI) && STI.getFeatureBits()[ARM::FeatureThumb2]; } + bool isTargetMachO(const MCSubtargetInfo &STI) const { const Triple &TT = STI.getTargetTriple(); return TT.isOSBinFormatMachO(); @@ -200,6 +213,7 @@ public: case ARM_AM::ib: return 3; } } + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. /// unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const { @@ -273,7 +287,6 @@ public: unsigned getSOImmOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(Op); // We expect MO to be an immediate or an expression, @@ -432,18 +445,6 @@ public: } // end anonymous namespace -MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - MCContext &Ctx) { - return new ARMMCCodeEmitter(MCII, Ctx, true); -} - -MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - MCContext &Ctx) { - return new ARMMCCodeEmitter(MCII, Ctx, false); -} - /// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing /// instructions, and rewrite them to their Thumb2 form if we are currently in /// Thumb2 mode. @@ -550,7 +551,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, bool ARMMCCodeEmitter:: EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const { + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); @@ -1515,7 +1516,7 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, uint32_t v = ~MO.getImm(); uint32_t lsb = countTrailingZeros(v); uint32_t msb = (32 - countLeadingZeros (v)) - 1; - assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!"); + assert(v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!"); return lsb | (msb << 5); } @@ -1700,3 +1701,15 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, } #include "ARMGenMCCodeEmitter.inc" + +MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new ARMMCCodeEmitter(MCII, Ctx, true); +} + +MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new ARMMCCodeEmitter(MCII, Ctx, false); +} diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 9e4d202321e6..477755157040 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -260,18 +260,37 @@ public: return false; int64_t Imm = Inst.getOperand(0).getImm(); - // FIXME: This is not right for thumb. Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes. return true; } }; +class ThumbMCInstrAnalysis : public ARMMCInstrAnalysis { +public: + ThumbMCInstrAnalysis(const MCInstrInfo *Info) : ARMMCInstrAnalysis(Info) {} + + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target) const override { + // We only handle PCRel branches for now. + if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL) + return false; + + int64_t Imm = Inst.getOperand(0).getImm(); + Target = Addr+Imm+4; // In Thumb mode the PC is always off by 4 bytes. + return true; + } +}; + } static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) { return new ARMMCInstrAnalysis(Info); } +static MCInstrAnalysis *createThumbMCInstrAnalysis(const MCInstrInfo *Info) { + return new ThumbMCInstrAnalysis(Info); +} + // Force static initialization. extern "C" void LLVMInitializeARMTargetMC() { for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget(), @@ -289,9 +308,6 @@ extern "C" void LLVMInitializeARMTargetMC() { TargetRegistry::RegisterMCSubtargetInfo(*T, ARM_MC::createARMMCSubtargetInfo); - // Register the MC instruction analyzer. - TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis); - TargetRegistry::RegisterELFStreamer(*T, createELFStreamer); TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer); TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer); @@ -313,6 +329,12 @@ extern "C" void LLVMInitializeARMTargetMC() { TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo); } + // Register the MC instruction analyzer. + for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget()}) + TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis); + for (Target *T : {&getTheThumbLETarget(), &getTheThumbBETarget()}) + TargetRegistry::RegisterMCInstrAnalysis(*T, createThumbMCInstrAnalysis); + // Register the MC Code Emitter for (Target *T : {&getTheARMLETarget(), &getTheThumbLETarget()}) TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index 482bcf902518..34c770440e1b 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARMMachORelocationInfo.cpp ----------------------------------------===// +//===- ARMMachORelocationInfo.cpp -----------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "ARMMCExpr.h" -#include "llvm-c/Disassembler.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm-c/Disassembler.h" using namespace llvm; -using namespace object; namespace { + class ARMMachORelocationInfo : public MCRelocationInfo { public: ARMMachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {} @@ -35,7 +35,8 @@ public: } } }; -} // End unnamed namespace + +} // end anonymous namespace /// createARMMachORelocationInfo - Construct an ARM Mach-O RelocationInfo. MCRelocationInfo *llvm::createARMMachORelocationInfo(MCContext &Ctx) { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index c0d10c896354..73e563890dd9 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -10,20 +10,21 @@ // This file implements the ARMTargetStreamer class. // //===----------------------------------------------------------------------===// -#include "llvm/ADT/MapVector.h" + #include "llvm/MC/ConstantPools.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" using namespace llvm; + // // ARMTargetStreamer Implemenation // + ARMTargetStreamer::ARMTargetStreamer(MCStreamer &S) : MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {} -ARMTargetStreamer::~ARMTargetStreamer() {} +ARMTargetStreamer::~ARMTargetStreamer() = default; // The constant pool handling is shared by all ARMTargetStreamer // implementations. @@ -73,5 +74,4 @@ void ARMTargetStreamer::finishAttributeSection() {} void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {} void ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {} - void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {} diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp index 173cc93d44fb..d3ab83bbccbc 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -14,12 +14,14 @@ #include "ARMUnwindOpAsm.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> using namespace llvm; namespace { + /// UnwindOpcodeStreamer - The simple wrapper over SmallVector to emit bytes /// with MSB to LSB per uint32_t ordering. For example, the first byte will /// be placed in Vec[3], and the following bytes will be placed in 2, 1, 0, @@ -27,20 +29,19 @@ namespace { class UnwindOpcodeStreamer { private: SmallVectorImpl<uint8_t> &Vec; - size_t Pos; + size_t Pos = 3; public: - UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V), Pos(3) { - } + UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V) {} /// Emit the byte in MSB to LSB per uint32_t order. - inline void EmitByte(uint8_t elem) { + void EmitByte(uint8_t elem) { Vec[Pos] = elem; Pos = (((Pos ^ 0x3u) + 1) ^ 0x3u); } /// Emit the size prefix. - inline void EmitSize(size_t Size) { + void EmitSize(size_t Size) { size_t SizeInWords = (Size + 3) / 4; assert(SizeInWords <= 0x100u && "Only 256 additional words are allowed for unwind opcodes"); @@ -48,19 +49,20 @@ namespace { } /// Emit the personality index prefix. - inline void EmitPersonalityIndex(unsigned PI) { + void EmitPersonalityIndex(unsigned PI) { assert(PI < ARM::EHABI::NUM_PERSONALITY_INDEX && "Invalid personality prefix"); EmitByte(ARM::EHABI::EHT_COMPACT | PI); } /// Fill the rest of bytes with FINISH opcode. - inline void FillFinishOpcode() { + void FillFinishOpcode() { while (Pos < Vec.size()) EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH); } }; -} + +} // end anonymous namespace void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { if (RegSave == 0u) @@ -153,7 +155,6 @@ void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) { void UnwindOpcodeAssembler::Finalize(unsigned &PersonalityIndex, SmallVectorImpl<uint8_t> &Result) { - UnwindOpcodeStreamer OpStreamer(Result); if (HasPersonality) { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h index e0c113ecfaa3..a7bfbdf4938e 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h @@ -16,8 +16,8 @@ #define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/DataTypes.h" +#include <cstddef> +#include <cstdint> namespace llvm { @@ -25,13 +25,12 @@ class MCSymbol; class UnwindOpcodeAssembler { private: - llvm::SmallVector<uint8_t, 32> Ops; - llvm::SmallVector<unsigned, 8> OpBegins; - bool HasPersonality; + SmallVector<uint8_t, 32> Ops; + SmallVector<unsigned, 8> OpBegins; + bool HasPersonality = false; public: - UnwindOpcodeAssembler() - : HasPersonality(0) { + UnwindOpcodeAssembler() { OpBegins.push_back(0); } @@ -40,12 +39,12 @@ public: Ops.clear(); OpBegins.clear(); OpBegins.push_back(0); - HasPersonality = 0; + HasPersonality = false; } /// Set the personality void setPersonality(const MCSymbol *Per) { - HasPersonality = 1; + HasPersonality = true; } /// Emit unwind opcodes for .save directives @@ -88,6 +87,6 @@ private: } }; -} // namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 166c04b41a77..7ae2f864d79d 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -10,23 +10,28 @@ #include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/Support/COFF.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> using namespace llvm; namespace { + class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { public: ARMWinCOFFObjectWriter(bool Is64Bit) : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) { assert(!Is64Bit && "AArch64 support not yet implemented"); } - ~ARMWinCOFFObjectWriter() override {} + + ~ARMWinCOFFObjectWriter() override = default; unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, @@ -35,6 +40,8 @@ public: bool recordRelocation(const MCFixup &) const override; }; +} // end anonymous namespace + unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, @@ -79,13 +86,13 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16; } -} namespace llvm { + MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit) { MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit); return createWinCOFFObjectWriter(MOTW, OS); } -} +} // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 9953c61cd89c..fc083b98395b 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -11,14 +11,36 @@ // //===----------------------------------------------------------------------===// -#include "Thumb1FrameLowering.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "Thumb1FrameLowering.h" +#include "Thumb1InstrInfo.h" +#include "ThumbRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> +#include <iterator> +#include <vector> using namespace llvm; @@ -238,9 +260,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, if (HasFP) { FramePtrOffsetInBlock += MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4) - .setMIFlags(MachineInstr::FrameSetup)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) + .addReg(ARM::SP) + .addImm(FramePtrOffsetInBlock / 4) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); if(FramePtrOffsetInBlock) { CFAOffset += FramePtrOffsetInBlock; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( @@ -336,14 +360,19 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) - .addReg(ARM::SP)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)); // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. if (MFI.hasVarSizedObjects()) AFI->setShouldRestoreSPFromFP(true); + + // In some cases, virtual registers have been introduced, e.g. by uses of + // emitThumbRegPlusImmInReg. + MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs); } static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) { @@ -408,13 +437,13 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - ARM::SP) - .addReg(ARM::R4)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)); } else - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), - ARM::SP) - .addReg(FramePtr)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(FramePtr) + .add(predOps(ARMCC::AL)); } else { if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { @@ -493,12 +522,12 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) return true; MachineInstrBuilder MIB = - AddDefaultPred( - BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))); + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) + .add(predOps(ARMCC::AL)); // Copy implicit ops and popped registers, if any. for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef())) - MIB.addOperand(MO); + MIB.add(MO); MIB.addReg(ARM::PC, RegState::Define); // Erase the old instruction (tBX_RET or tPOP). MBB.erase(MBBI); @@ -566,22 +595,23 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, if (TemporaryReg) { assert(!PopReg && "Unnecessary MOV is about to be inserted"); PopReg = PopFriendly.find_first(); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(TemporaryReg, RegState::Define) - .addReg(PopReg, RegState::Kill)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(TemporaryReg, RegState::Define) + .addReg(PopReg, RegState::Kill) + .add(predOps(ARMCC::AL)); } if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { // We couldn't use the direct restoration above, so // perform the opposite conversion: tPOP_RET to tPOP. MachineInstrBuilder MIB = - AddDefaultPred( - BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))); + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)); bool Popped = false; for (auto MO: MBBI->operands()) if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && MO.getReg() != ARM::PC) { - MIB.addOperand(MO); + MIB.add(MO); if (!MO.isImplicit()) Popped = true; } @@ -590,23 +620,27 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, MBB.erase(MIB.getInstr()); // Erase the old instruction. MBB.erase(MBBI); - MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))); + MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) + .add(predOps(ARMCC::AL)); } assert(PopReg && "Do not know how to get LR"); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) .addReg(PopReg, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::LR, RegState::Define) - .addReg(PopReg, RegState::Kill)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill) + .add(predOps(ARMCC::AL)); if (TemporaryReg) - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(PopReg, RegState::Define) - .addReg(TemporaryReg, RegState::Kill)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(PopReg, RegState::Define) + .addReg(TemporaryReg, RegState::Kill) + .add(predOps(ARMCC::AL)); return true; } @@ -667,8 +701,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Push the low registers and lr if (!LoRegsToSave.empty()) { - MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)); - AddDefaultPred(MIB); + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { if (LoRegsToSave.count(Reg)) { bool isKill = !MF.getRegInfo().isLiveIn(Reg); @@ -708,8 +742,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); // Create the PUSH, but don't insert it yet (the MOVs need to come first). - MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)); - AddDefaultPred(PushMIB); + MachineInstrBuilder PushMIB = + BuildMI(MF, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); SmallVector<unsigned, 4> RegsToPush; while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { @@ -719,11 +753,10 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MBB.addLiveIn(*HiRegToSave); // Emit a MOV from the high reg to the low reg. - MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)); - MIB.addReg(*CopyReg, RegState::Define); - MIB.addReg(*HiRegToSave, getKillRegState(isKill)); - AddDefaultPred(MIB); + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(*CopyReg, RegState::Define) + .addReg(*HiRegToSave, getKillRegState(isKill)) + .add(predOps(ARMCC::AL)); // Record the register that must be added to the PUSH. RegsToPush.push_back(*CopyReg); @@ -735,7 +768,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, } // Add the low registers to the PUSH, in ascending order. - for (unsigned Reg : reverse(RegsToPush)) + for (unsigned Reg : llvm::reverse(RegsToPush)) PushMIB.addReg(Reg, RegState::Kill); // Insert the PUSH instruction after the MOVs. @@ -817,19 +850,18 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); // Create the POP instruction. - MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)); - AddDefaultPred(PopMIB); + MachineInstrBuilder PopMIB = + BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { // Add the low register to the POP. PopMIB.addReg(*CopyReg, RegState::Define); // Create the MOV from low to high register. - MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)); - MIB.addReg(*HiRegToRestore, RegState::Define); - MIB.addReg(*CopyReg, RegState::Kill); - AddDefaultPred(MIB); + BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) + .addReg(*HiRegToRestore, RegState::Define) + .addReg(*CopyReg, RegState::Kill) + .add(predOps(ARMCC::AL)); CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); HiRegToRestore = @@ -837,11 +869,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, } } - - - - MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); - AddDefaultPred(MIB); + MachineInstrBuilder MIB = + BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); bool NeedsPop = false; for (unsigned i = CSI.size(); i != 0; --i) { @@ -859,6 +888,16 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // ARMv4T requires BX, see emitEpilogue if (!STI.hasV5TOps()) continue; + // Tailcall optimization failed; change TCRETURN to a tBL + if (MI->getOpcode() == ARM::TCRETURNdi || + MI->getOpcode() == ARM::TCRETURNri) { + unsigned Opcode = MI->getOpcode() == ARM::TCRETURNdi + ? ARM::tBL : ARM::tBLXr; + MachineInstrBuilder BL = BuildMI(MF, DL, TII.get(Opcode)); + BL.add(predOps(ARMCC::AL)); + BL.add(MI->getOperand(0)); + MBB.insert(MI, &*BL); + } Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); if (MI != MBB.end()) diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 4b4fbaab28d9..27bff4d75acf 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -50,20 +50,29 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) || !ARM::tGPRRegClass.contains(DestReg)) - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc))); + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); else { - // FIXME: The performance consequences of this are going to be atrocious. - // Some things to try that should be better: - // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11 - // * 'movs $dst, $src' if cpsr isn't live - // See: http://lists.llvm.org/pipermail/llvm-dev/2014-August/075998.html + // FIXME: Can also use 'mov hi, $src; mov $dst, hi', + // with hi as either r10 or r11. + + const TargetRegisterInfo *RegInfo = st.getRegisterInfo(); + if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I) + == MachineBasicBlock::LQR_Dead) { + BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + ->addRegisterDead(ARM::CPSR, RegInfo); + return; + } // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH))) - .addReg(SrcReg, getKillRegState(KillSrc)); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP))) - .addReg(DestReg, getDefRegState(true)); + BuildMI(MBB, I, DL, get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .addReg(DestReg, getDefRegState(true)); } } @@ -87,9 +96,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::tSTRspi)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } } @@ -113,8 +125,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); } } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index d01fc8c40ddf..04bdd91b53e6 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -9,13 +9,26 @@ #include "ARM.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include <cassert> +#include <new> + using namespace llvm; #define DEBUG_TYPE "thumb2-it" @@ -24,16 +37,18 @@ STATISTIC(NumITs, "Number of IT blocks inserted"); STATISTIC(NumMovedInsts, "Number of predicated instructions moved"); namespace { + class Thumb2ITBlockPass : public MachineFunctionPass { public: static char ID; - Thumb2ITBlockPass() : MachineFunctionPass(ID) {} bool restrictIT; const Thumb2InstrInfo *TII; const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; + Thumb2ITBlockPass() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { @@ -52,8 +67,10 @@ namespace { SmallSet<unsigned, 4> &Uses); bool InsertITInstructions(MachineBasicBlock &MBB); }; + char Thumb2ITBlockPass::ID = 0; -} + +} // end anonymous namespace /// TrackDefUses - Tracking what registers are being defined and used by /// instructions in the IT block. This also tracks "dependencies", i.e. uses diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 1c731d669eda..818ba85c7d40 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -117,8 +117,9 @@ void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (!ARM::GPRRegClass.contains(DestReg, SrcReg)) return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc))); + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); } void Thumb2InstrInfo:: @@ -138,9 +139,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || RC == &ARM::GPRnopcRegClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::t2STRi12)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); return; } @@ -156,8 +160,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); - MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO); - AddDefaultPred(MIB); + MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL)); return; } @@ -180,8 +183,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || RC == &ARM::GPRnopcRegClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO) + .add(predOps(ARMCC::AL)); return; } @@ -198,8 +204,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); - MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO); - AddDefaultPred(MIB); + MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL)); if (TargetRegisterInfo::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); @@ -259,10 +264,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, if (Fits) { if (isSub) { BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg) - .addReg(BaseReg) - .addReg(DestReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0) - .setMIFlags(MIFlags); + .addReg(BaseReg) + .addReg(DestReg, RegState::Kill) + .add(predOps(Pred, PredReg)) + .add(condCodeOp()) + .setMIFlags(MIFlags); } else { // Here we know that DestReg is not SP but we do not // know anything about BaseReg. t2ADDrr is an invalid @@ -270,10 +276,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, // is fine if SP is the first argument. To be sure we // do not generate invalid encoding, put BaseReg first. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg) - .addReg(BaseReg) - .addReg(DestReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0) - .setMIFlags(MIFlags); + .addReg(BaseReg) + .addReg(DestReg, RegState::Kill) + .add(predOps(Pred, PredReg)) + .add(condCodeOp()) + .setMIFlags(MIFlags); } return; } @@ -284,8 +291,10 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, unsigned Opc = 0; if (DestReg == ARM::SP && BaseReg != ARM::SP) { // mov sp, rn. Note t2MOVr cannot be used. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg) - .addReg(BaseReg).setMIFlags(MIFlags)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(BaseReg) + .setMIFlags(MIFlags) + .add(predOps(ARMCC::AL)); BaseReg = ARM::SP; continue; } @@ -296,8 +305,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) { assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?"); Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags)); + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg) + .addImm(ThisVal / 4) + .setMIFlags(MIFlags) + .add(predOps(ARMCC::AL)); NumBytes = 0; continue; } @@ -334,12 +346,13 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, } // Build the new ADD / SUB. - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg, RegState::Kill) - .addImm(ThisVal)).setMIFlags(MIFlags); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, RegState::Kill) + .addImm(ThisVal) + .add(predOps(ARMCC::AL)) + .setMIFlags(MIFlags); if (HasCCOut) - AddDefaultCC(MIB); + MIB.add(condCodeOp()); BaseReg = DestReg; } @@ -474,7 +487,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, do MI.RemoveOperand(FrameRegIdx+1); while (MI.getNumOperands() > FrameRegIdx+1); MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); return true; } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 8208e7e24770..c90475c28db7 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -10,20 +10,38 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" -#include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/IR/Function.h" // To access Function attributes +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> +#include <iterator> #include <utility> + using namespace llvm; #define DEBUG_TYPE "t2-reduce-size" @@ -40,6 +58,7 @@ static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3", cl::init(-1), cl::Hidden); namespace { + /// ReduceTable - A static table with information on mapping from wide /// opcodes to narrow struct ReduceEntry { @@ -139,11 +158,12 @@ namespace { class Thumb2SizeReduce : public MachineFunctionPass { public: static char ID; - Thumb2SizeReduce(std::function<bool(const Function &)> Ftor); const Thumb2InstrInfo *TII; const ARMSubtarget *STI; + Thumb2SizeReduce(std::function<bool(const Function &)> Ftor); + bool runOnMachineFunction(MachineFunction &MF) override; MachineFunctionProperties getRequiredProperties() const override { @@ -201,19 +221,21 @@ namespace { struct MBBInfo { // The flags leaving this block have high latency. - bool HighLatencyCPSR; + bool HighLatencyCPSR = false; // Has this block been visited yet? - bool Visited; + bool Visited = false; - MBBInfo() : HighLatencyCPSR(false), Visited(false) {} + MBBInfo() = default; }; SmallVector<MBBInfo, 8> BlockInfo; std::function<bool(const Function &)> PredicateFtor; }; + char Thumb2SizeReduce::ID = 0; -} + +} // end anonymous namespace Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { @@ -490,14 +512,13 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, isLdStMul = true; break; } - case ARM::t2STMIA: { + case ARM::t2STMIA: // If the base register is killed, we don't care what its value is after the // instruction, so we can use an updating STMIA. if (!MI->getOperand(0).isKill()) return false; break; - } case ARM::t2LDMIA_RET: { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) @@ -562,8 +583,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead); if (!isLdStMul) { - MIB.addOperand(MI->getOperand(0)); - MIB.addOperand(MI->getOperand(1)); + MIB.add(MI->getOperand(0)); + MIB.add(MI->getOperand(1)); if (HasImmOffset) MIB.addImm(OffsetImm / Scale); @@ -577,7 +598,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // Transfer the rest of operands. for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) - MIB.addOperand(MI->getOperand(OpNum)); + MIB.add(MI->getOperand(OpNum)); // Transfer memoperands. MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); @@ -621,12 +642,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) return false; - MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), - TII->get(ARM::tADDrSPi)) - .addOperand(MI->getOperand(0)) - .addOperand(MI->getOperand(1)) - .addImm(Imm / 4); // The tADDrSPi has an implied scale by four. - AddDefaultPred(MIB); + MachineInstrBuilder MIB = + BuildMI(MBB, MI, MI->getDebugLoc(), + TII->get(ARM::tADDrSPi)) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addImm(Imm / 4) // The tADDrSPi has an implied scale by four. + .add(predOps(ARMCC::AL)); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); @@ -652,11 +674,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) { switch (Opc) { default: break; - case ARM::t2ADDSri: { + case ARM::t2ADDSri: if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) return true; LLVM_FALLTHROUGH; - } case ARM::t2ADDSrr: return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } @@ -698,7 +719,6 @@ bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop) { - if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; @@ -785,13 +805,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); - MIB.addOperand(MI->getOperand(0)); - if (NewMCID.hasOptionalDef()) { - if (HasCC) - AddDefaultT1CC(MIB, CCDead); - else - AddNoT1CC(MIB); - } + MIB.add(MI->getOperand(0)); + if (NewMCID.hasOptionalDef()) + MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); @@ -800,7 +816,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, continue; if (SkipPred && MCID.OpInfo[i].isPredicate()) continue; - MIB.addOperand(MI->getOperand(i)); + MIB.add(MI->getOperand(i)); } // Transfer MI flags. @@ -880,13 +896,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); - MIB.addOperand(MI->getOperand(0)); - if (NewMCID.hasOptionalDef()) { - if (HasCC) - AddDefaultT1CC(MIB, CCDead); - else - AddNoT1CC(MIB); - } + MIB.add(MI->getOperand(0)); + if (NewMCID.hasOptionalDef()) + MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp()); // Transfer the rest of operands. unsigned NumOps = MCID.getNumOperands(); @@ -909,10 +921,10 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Skip implicit def of CPSR. Either it's modeled as an optional // def now or it's already an implicit def on the new instruction. continue; - MIB.addOperand(MO); + MIB.add(MO); } if (!MCID.isPredicable() && NewMCID.isPredicable()) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); // Transfer MI flags. MIB.setMIFlags(MI->getFlags()); diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index 2efd63b84a2c..15a567523336 100644 --- a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -93,9 +93,10 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB, unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) - .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0) - .setMIFlags(MIFlags); + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx) + .add(predOps(ARMCC::AL)) + .setMIFlags(MIFlags); } /// emitLoadConstPool - Emits a load from constpool to materialize the @@ -145,14 +146,17 @@ static void emitThumbRegPlusImmInReg( LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) { - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg) + .add(t1CondCodeOp()) .addImm(NumBytes) .setMIFlags(MIFlags); } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) { - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg) + .add(t1CondCodeOp()) .addImm(NumBytes) .setMIFlags(MIFlags); - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg) + .add(t1CondCodeOp()) .addReg(LdReg, RegState::Kill) .setMIFlags(MIFlags); } else if (ST.genExecuteOnly()) { @@ -167,12 +171,12 @@ static void emitThumbRegPlusImmInReg( : ((isHigh || !CanChangeCC) ? ARM::tADDhirr : ARM::tADDrr); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (Opc != ARM::tADDhirr) - MIB = AddDefaultT1CC(MIB); + MIB = MIB.add(t1CondCodeOp()); if (DestReg == ARM::SP || isSub) MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill); else MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); } /// emitThumbRegPlusImmediate - Emits a series of instructions to materialize @@ -307,12 +311,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg); if (CopyNeedsCC) - MIB = AddDefaultT1CC(MIB); + MIB = MIB.add(t1CondCodeOp()); MIB.addReg(BaseReg, RegState::Kill); if (CopyOpc != ARM::tMOVr) { MIB.addImm(CopyImm); } - AddDefaultPred(MIB.setMIFlags(MIFlags)); + MIB.setMIFlags(MIFlags).add(predOps(ARMCC::AL)); BaseReg = DestReg; } @@ -324,10 +328,11 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg); if (ExtraNeedsCC) - MIB = AddDefaultT1CC(MIB); - MIB.addReg(BaseReg).addImm(ExtraImm); - MIB = AddDefaultPred(MIB); - MIB.setMIFlags(MIFlags); + MIB = MIB.add(t1CondCodeOp()); + MIB.addReg(BaseReg) + .addImm(ExtraImm) + .add(predOps(ARMCC::AL)) + .setMIFlags(MIFlags); } } @@ -460,9 +465,10 @@ bool ThumbRegisterInfo::saveScavengerRegister( // a call clobbered register that we know won't be used in Thumb1 mode. const TargetInstrInfo &TII = *STI.getInstrInfo(); DebugLoc DL; - AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) - .addReg(ARM::R12, RegState::Define) - .addReg(Reg, RegState::Kill)); + BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) + .addReg(ARM::R12, RegState::Define) + .addReg(Reg, RegState::Kill) + .add(predOps(ARMCC::AL)); // The UseMI is where we would like to restore the register. If there's // interference with R12 before then, however, we'll need to restore it @@ -490,8 +496,10 @@ bool ThumbRegisterInfo::saveScavengerRegister( } } // Restore the register from R12 - AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)). - addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill)); + BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)) + .addReg(Reg, RegState::Define) + .addReg(ARM::R12, RegState::Kill) + .add(predOps(ARMCC::AL)); return true; } @@ -621,5 +629,5 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Add predicate back if it's needed. if (MI.isPredicable()) - AddDefaultPred(MIB); + MIB.add(predOps(ARMCC::AL)); } |