diff options
Diffstat (limited to 'lib/Target/ARM')
50 files changed, 5013 insertions, 2484 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 14825a785649..271ca44c2b69 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -30,22 +30,22 @@ class formatted_raw_ostream; namespace ARMCC { // The CondCodes constants map directly to the 4-bit encoding of the // condition field for predicated instructions. - enum CondCodes { - EQ, - NE, - HS, - LO, - MI, - PL, - VS, - VC, - HI, - LS, - GE, - LT, - GT, - LE, - AL + enum CondCodes { // Meaning (integer) Meaning (floating-point) + EQ, // Equal Equal + NE, // Not equal Not equal, or unordered + HS, // Carry set >, ==, or unordered + LO, // Carry clear Less than + MI, // Minus, negative Less than + PL, // Plus, positive or zero >, ==, or unordered + VS, // Overflow Unordered + VC, // No overflow Not unordered + HI, // Unsigned higher Greater than, or unordered + LS, // Unsigned lower or same Less than or equal + GE, // Greater than or equal Greater than or equal + LT, // Less than Less than, or unordered + GT, // Greater than Greater than + LE, // Less than or equal <, ==, or unordered + AL // Always (unconditional) Always (unconditional) }; inline static CondCodes getOppositeCondition(CondCodes CC) { @@ -90,6 +90,33 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { } } +namespace ARM_MB { + // The Memory Barrier Option constants map directly to the 4-bit encoding of + // the option field for memory barrier operations. + enum MemBOpt { + ST = 14, + ISH = 11, + ISHST = 10, + NSH = 7, + NSHST = 6, + OSH = 3, + OSHST = 2 + }; + + inline static const char *MemBOptToString(unsigned val) { + switch (val) { + default: llvm_unreachable("Unknown memory opetion"); + case ST: return "st"; + case ISH: return "ish"; + case ISHST: return "ishst"; + case NSH: return "nsh"; + case NSHST: return "nshst"; + case OSH: return "osh"; + case OSHST: return "oshst"; + } + } +} // namespace ARM_MB + FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); @@ -98,6 +125,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); +FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); FunctionPass *createNEONPreAllocPass(); FunctionPass *createNEONMoveFixPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index fa64d6c2a4b4..d6a8f19724dc 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -1,4 +1,4 @@ -//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===// +//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -20,20 +20,6 @@ include "llvm/Target/Target.td" // ARM Subtarget features. // -def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", - "ARM v4T">; -def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T", - "ARM v5T">; -def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", - "ARM v5TE, v5TEj, v5TExp">; -def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", - "ARM v6">; -def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", - "ARM v6t2">; -def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", - "ARM v7A">; -def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", - "ARM v7M">; def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", "Enable VFP2 instructions">; def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3", @@ -42,14 +28,20 @@ def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON", "Enable NEON instructions">; def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", "Enable Thumb2 instructions">; +def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", + "Does not support ARM mode execution">; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; -def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", +def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", "Enable Thumb2 extract and pack instructions">; +def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", + "Has data barrier (dmb / dsb) instructions">; def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; +def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", + "Floating point unit supports single precision only">; // Some processors have multiply-accumulate instructions that don't // play nicely with other VFP instructions, and it's generally better @@ -57,14 +49,41 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", // FIXME: Currently, this is only flagged for Cortex-A8. It may be true for // others as well. We should do more benchmarking and confirm one way or // the other. -def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true", - "Disable VFP MAC instructions">; +def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true", + "Disable VFP MAC instructions">; // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", "true", "Use NEON for single precision FP">; +// Disable 32-bit to 16-bit narrowing for experimentation. +def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", + "Prefer 32-bit Thumb instrs">; + + +// ARM architectures. +def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", + "ARM v4T">; +def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T", + "ARM v5T">; +def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", + "ARM v5TE, v5TEj, v5TExp">; +def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", + "ARM v6">; +def ArchV6M : SubtargetFeature<"v6m", "ARMArchVersion", "V6M", + "ARM v6m", + [FeatureNoARM, FeatureDB]>; +def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", + "ARM v6t2", + [FeatureThumb2]>; +def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", + "ARM v7A", + [FeatureThumb2, FeatureNEON, FeatureDB]>; +def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", + "ARM v7M", + [FeatureThumb2, FeatureNoARM, FeatureDB, + FeatureHWDiv]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -122,20 +141,23 @@ def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>; def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; +// V6M Processors. +def : Processor<"cortex-m0", ARMV6Itineraries, [ArchV6M]>; + // V6T2 Processors. -def : Processor<"arm1156t2-s", ARMV6Itineraries, - [ArchV6T2, FeatureThumb2]>; -def : Processor<"arm1156t2f-s", ARMV6Itineraries, - [ArchV6T2, FeatureThumb2, FeatureVFP2]>; +def : Processor<"arm1156t2-s", ARMV6Itineraries, [ArchV6T2]>; +def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>; // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, - FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>; + [ArchV7A, FeatureHasSlowVMLx, + FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>; -def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; -def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; + [ArchV7A, FeatureT2XtPk]>; + +// V7M Processors. +def : ProcNoItin<"cortex-m3", [ArchV7M]>; +def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 92a13f1d751c..db481005b3a4 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -458,6 +458,7 @@ namespace ARM_AM { // IB - Increment before // DA - Decrement after // DB - Decrement before + // For VFP instructions, only the IA and DB modes are valid. static inline AMSubMode getAM4SubMode(unsigned Mode) { return (AMSubMode)(Mode & 0x7); @@ -477,14 +478,6 @@ namespace ARM_AM { // // The first operand is always a Reg. The second operand encodes the // operation in bit 8 and the immediate in bits 0-7. - // - // This is also used for FP load/store multiple ops. The second operand - // encodes the number of registers (or 2 times the number of registers - // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the - // following two sub-modes: - // - // IA - Increment after - // DB - Decrement before /// getAM5Opc - This function encodes the addrmode5 opc field. static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { @@ -498,17 +491,6 @@ namespace ARM_AM { return ((AM5Opc >> 8) & 1) ? sub : add; } - /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and - /// VSTM instructions. - static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) { - assert((SubMode == ia || SubMode == db) && - "Illegal addressing mode 5 sub-mode!"); - return ((int)SubMode << 8) | Offset; - } - static inline AMSubMode getAM5SubMode(unsigned AM5Opc) { - return (AMSubMode)((AM5Opc >> 8) & 0x7); - } - //===--------------------------------------------------------------------===// // Addressing Mode #6 //===--------------------------------------------------------------------===// diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 946f4744f5bb..6cfd5961149f 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -17,7 +17,7 @@ #include "ARMBuildAttrs.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" -#include "ARMInstPrinter.h" +#include "AsmPrinter/ARMInstPrinter.h" #include "ARMMachineFunctionInfo.h" #include "ARMMCInstLower.h" #include "ARMTargetMachine.h" @@ -47,6 +47,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <cctype> @@ -56,6 +57,15 @@ static cl::opt<bool> EnableMCInst("enable-arm-mcinst-printer", cl::Hidden, cl::desc("enable experimental asmprinter gunk in the arm backend")); +namespace llvm { + namespace ARM { + enum DW_ISA { + DW_ISA_ARM_thumb = 1, + DW_ISA_ARM_arm = 2 + }; + } +} + namespace { class ARMAsmPrinter : public AsmPrinter { @@ -80,9 +90,9 @@ namespace { virtual const char *getPassName() const { return "ARM Assembly Printer"; } - + void printInstructionThroughMCStreamer(const MachineInstr *MI); - + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, const char *Modifier = 0); @@ -110,8 +120,12 @@ namespace { void printAddrModePCOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, const char *Modifier = 0); - void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum, - raw_ostream &O); + void printBitfieldInvMaskImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); + void printMemBOption(const MachineInstr *MI, int OpNum, + raw_ostream &O); + void printShiftImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); @@ -190,12 +204,32 @@ namespace { virtual void EmitInstruction(const MachineInstr *MI); bool runOnMachineFunction(MachineFunction &F); - + virtual void EmitConstantPool() {} // we emit constant pools customly! virtual void EmitFunctionEntryLabel(); void EmitStartOfAsmFile(Module &M); void EmitEndOfAsmFile(Module &M); + MachineLocation getDebugValueLocation(const MachineInstr *MI) const { + MachineLocation Location; + assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + else { + DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + } + return Location; + } + + virtual unsigned getISAEncoding() { + // ARM/Darwin adds ISA to the DWARF info for each function. + if (!Subtarget->isTargetDarwin()) + return 0; + return Subtarget->isThumb() ? + llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm; + } + MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, const MachineBasicBlock *MBB) const; MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const; @@ -208,7 +242,7 @@ namespace { EmitMachineConstantPoolValue(MCPV, OS); OutStreamer.EmitRawText(OS.str()); } - + void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV, raw_ostream &O) { switch (TM.getTargetData()->getTypeAllocSize(MCPV->getType())) { @@ -234,7 +268,7 @@ namespace { // FIXME: Remove this when Darwin transition to @GOT like syntax. MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); O << *Sym; - + MachineModuleInfoMachO &MMIMachO = MMI->getObjFileInfo<MachineModuleInfoMachO>(); MachineModuleInfoImpl::StubValueTy &StubSym = @@ -278,7 +312,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitRawText(OS.str()); } } - + OutStreamer.EmitLabel(CurrentFnSym); } @@ -358,7 +392,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, case MachineOperand::MO_ExternalSymbol: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); O << *GetExternalSymbolSymbol(MO.getSymbolName()); - + if (isCallOp && Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_) O << "(PLT)"; @@ -438,15 +472,13 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op, O << getRegisterName(MO1.getReg()); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())) - << " "; - + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (MO2.getReg()) { - O << getRegisterName(MO2.getReg()); + O << ' ' << getRegisterName(MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else { - O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } else if (ShOpc != ARM_AM::rrx) { + O << " #" << ARM_AM::getSORegOffset(MO3.getImm()); } } @@ -575,16 +607,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - if (Modifier && strcmp(Modifier, "submode") == 0) { - ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - O << ARM_AM::getAMSubModeStr(Mode); - return; - } else if (Modifier && strcmp(Modifier, "base") == 0) { - // Used for FSTM{D|S} and LSTM{D|S} operations. - O << getRegisterName(MO1.getReg()); - return; - } - O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { @@ -641,6 +663,32 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op, O << "#" << lsb << ", #" << width; } +void +ARMAsmPrinter::printMemBOption(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + unsigned val = MI->getOperand(OpNum).getImm(); + O << ARM_MB::MemBOptToString(val); +} + +void ARMAsmPrinter::printShiftImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + unsigned ShiftOp = MI->getOperand(OpNum).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + switch (Opc) { + case ARM_AM::no_shift: + return; + case ARM_AM::lsl: + O << ", lsl #"; + break; + case ARM_AM::asr: + O << ", asr #"; + break; + default: + assert(0 && "unexpected shift opcode for shift immediate operand"); + } + O << ARM_AM::getSORegOffset(ShiftOp); +} + //===--------------------------------------------------------------------===// void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op, @@ -737,12 +785,11 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum, O << getRegisterName(Reg); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())) - << " "; - assert(MO2.isImm() && "Not a valid t2_so_reg value!"); - O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); + if (ShOpc != ARM_AM::rrx) + O << " #" << ARM_AM::getSORegOffset(MO2.getImm()); } void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI, @@ -916,12 +963,12 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum, const MachineOperand &MO1 = MI->getOperand(OpNum); const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id - + unsigned JTI = MO1.getIndex(); MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); // Can't use EmitLabel until instprinter happens, label comes out in the wrong // order. - O << *JTISymbol << ":\n"; + O << "\n" << *JTISymbol << ":\n"; const char *JTEntryDirective = MAI->getData32bitsDirective(); @@ -958,12 +1005,12 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum, const MachineOperand &MO1 = MI->getOperand(OpNum); const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); - + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); - + // Can't use EmitLabel until instprinter happens, label comes out in the wrong // order. - O << *JTISymbol << ":\n"; + O << "\n" << *JTISymbol << ":\n"; const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); @@ -980,7 +1027,7 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum, O << MAI->getData8bitsDirective(); else if (HalfWordOffset) O << MAI->getData16bitsDirective(); - + if (ByteOffset || HalfWordOffset) O << '(' << *MBB->getSymbol() << "-" << *JTISymbol << ")/2"; else @@ -1086,10 +1133,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { printInstructionThroughMCStreamer(MI); return; } - + if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY) EmitAlignment(2); - + SmallString<128> Str; raw_svector_ostream OS(Str); if (MI->getOpcode() == ARM::DBG_VALUE) { @@ -1112,7 +1159,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { printInstruction(MI, OS); OutStreamer.EmitRawText(OS.str()); - + // Make sure the instruction that follows TBB is 2-byte aligned. // FIXME: Constant island pass should insert an "ALIGN" instruction instead. if (MI->getOpcode() == ARM::t2TBB) @@ -1129,7 +1176,7 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { // avoid out-of-range branches that are due a fundamental limitation of // the way symbol offsets are encoded with the current Darwin ARM // relocations. - const TargetLoweringObjectFileMachO &TLOFMacho = + const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>( getObjFileLowering()); OutStreamer.SwitchSection(TLOFMacho.getTextSection()); @@ -1148,6 +1195,12 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { 16, SectionKind::getText()); OutStreamer.SwitchSection(sect); } + const MCSection *StaticInitSect = + OutContext.getMachOSection("__TEXT", "__StaticInit", + MCSectionMachO::S_REGULAR | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + OutStreamer.SwitchSection(StaticInitSect); } } @@ -1173,8 +1226,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer.EmitRawText("\t.eabi_attribute " + Twine(ARMBuildAttrs::ABI_FP_exceptions) + ", 1"); } - - if (FiniteOnlyFPMath()) + + if (NoInfsFPMath && NoNaNsFPMath) OutStreamer.EmitRawText("\t.eabi_attribute " + Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 1"); else @@ -1280,7 +1333,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // LPC0: // add r0, pc, r0 // This adds the address of LPC0 to r0. - + // Emit the label. // FIXME: MOVE TO SHARED PLACE. unsigned Id = (unsigned)MI->getOperand(2).getImm(); @@ -1288,8 +1341,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix) + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id)); OutStreamer.EmitLabel(Label); - - + + // Form and emit tha dd. MCInst AddInst; AddInst.setOpcode(ARM::ADDrr); @@ -1315,7 +1368,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); else EmitGlobalConstant(MCPE.Val.ConstVal); - + return; } case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file. @@ -1325,13 +1378,13 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); - + { MCInst TmpInst; TmpInst.setOpcode(ARM::MOVi); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1)); - + // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); @@ -1349,11 +1402,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - + TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out OutStreamer.EmitInstruction(TmpInst); } - return; + return; } case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file. // This is a hack that lowers as a two instruction sequence. @@ -1384,32 +1437,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.setOpcode(ARM::MOVi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg TmpInst.addOperand(V1); // lower16(imm) - + // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - + OutStreamer.EmitInstruction(TmpInst); } - + { MCInst TmpInst; TmpInst.setOpcode(ARM::MOVTi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg TmpInst.addOperand(V2); // upper16(imm) - + // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - + OutStreamer.EmitInstruction(TmpInst); } - + return; } } - + MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); OutStreamer.EmitInstruction(TmpInst); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 49c16f3e0720..3a8bebe0dd24 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -15,9 +15,9 @@ #include "ARM.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" -#include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "ARMGenInstrInfo.inc" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/GlobalValue.h" @@ -501,7 +501,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { llvm_unreachable("Unknown or unset size field for instr!"); case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::DBG_VALUE: return 0; @@ -573,48 +573,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 0; // Not reached } -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -/// -bool -ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned& SrcSubIdx, unsigned& DstSubIdx) const { - switch (MI.getOpcode()) { - default: break; - case ARM::VMOVS: - case ARM::VMOVD: - case ARM::VMOVDneon: - case ARM::VMOVQ: - case ARM::VMOVQQ : { - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SrcSubIdx = MI.getOperand(1).getSubReg(); - DstSubIdx = MI.getOperand(0).getSubReg(); - return true; - } - case ARM::MOVr: - case ARM::MOVr_TC: - case ARM::tMOVr: - case ARM::tMOVgpr2tgpr: - case ARM::tMOVtgpr2gpr: - case ARM::tMOVgpr2gpr: - case ARM::t2MOVr: { - assert(MI.getDesc().getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "Invalid ARM MOV instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SrcSubIdx = MI.getOperand(1).getSubReg(); - DstSubIdx = MI.getOperand(0).getSubReg(); - return true; - } - } - - return false; -} - unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { @@ -763,8 +721,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Align); // tGPR is used sometimes in ARM instructions that need to avoid using - // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) + // certain registers. Just treat it as GPR here. Likewise, rGPR. + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass + || RC == ARM::rGPRRegisterClass) RC = ARM::GPRRegisterClass; switch (RC->getID()) { @@ -798,7 +757,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addMemOperand(MMO)); } break; @@ -818,7 +777,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); @@ -830,7 +789,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); @@ -865,7 +824,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass + || RC == ARM::rGPRRegisterClass) RC = ARM::GPRRegisterClass; switch (RC->getID()) { @@ -893,7 +853,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addMemOperand(MMO)); } break; @@ -910,7 +870,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); @@ -922,7 +882,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); @@ -963,6 +923,11 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { unsigned PCLabelId = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *NewCPV = 0; + // FIXME: The below assumes PIC relocation model and that the function + // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and + // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR + // instructions, so that's probably OK, but is PIC always correct when + // we get here? if (ACPV->isGlobalValue()) NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, ARMCP::CPValue, 4); @@ -972,6 +937,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { else if (ACPV->isBlockAddress()) NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, ARMCP::CPBlockAddress, 4); + else if (ACPV->isLSDA()) + NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId, + ARMCP::CPLSDA, 4); else llvm_unreachable("Unexpected ARM constantpool value type!!"); CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); @@ -1393,3 +1361,63 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Offset = (isSub) ? -Offset : Offset; return Offset == 0; } + +bool ARMBaseInstrInfo:: +AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpValue) const { + switch (MI->getOpcode()) { + default: break; + case ARM::CMPri: + case ARM::CMPzri: + case ARM::t2CMPri: + case ARM::t2CMPzri: + SrcReg = MI->getOperand(0).getReg(); + CmpValue = MI->getOperand(1).getImm(); + return true; + } + + return false; +} + +/// ConvertToSetZeroFlag - Convert the instruction to set the "zero" flag so +/// that we can remove a "comparison with zero". +bool ARMBaseInstrInfo:: +ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const { + // Conservatively refuse to convert an instruction which isn't in the same BB + // as the comparison. + if (MI->getParent() != CmpInstr->getParent()) + return false; + + // Check that CPSR isn't set between the comparison instruction and the one we + // want to change. + MachineBasicBlock::const_iterator I = CmpInstr, E = MI; + --I; + for (; I != E; --I) { + const MachineInstr &Instr = *I; + + for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { + const MachineOperand &MO = Instr.getOperand(IO); + if (!MO.isReg() || !MO.isDef()) continue; + + // This instruction modifies CPSR before the one we want to change. We + // can't do this transformation. + if (MO.getReg() == ARM::CPSR) + return false; + } + } + + // Set the "zero" bit in CPSR. + switch (MI->getOpcode()) { + default: break; + case ARM::ADDri: + case ARM::SUBri: + case ARM::t2ADDri: + case ARM::t2SUBri: + MI->RemoveOperand(5); + MachineInstrBuilder(MI) + .addReg(ARM::CPSR, RegState::Define | RegState::Implicit); + CmpInstr->eraseFromParent(); + return true; + } + + return false; +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 89a2db74a75e..b4f4a33a70ad 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -15,11 +15,12 @@ #define ARMBASEINSTRUCTIONINFO_H #include "ARM.h" -#include "ARMRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetInstrInfo.h" namespace llvm { + class ARMSubtarget; + class ARMBaseRegisterInfo; /// ARMII - This namespace holds all of the target specific flags that /// instruction info tracks. @@ -97,44 +98,45 @@ namespace ARMII { // Miscellaneous arithmetic instructions ArithMiscFrm = 12 << FormShift, + SatFrm = 13 << FormShift, // Extend instructions - ExtFrm = 13 << FormShift, + ExtFrm = 14 << FormShift, // VFP formats - VFPUnaryFrm = 14 << FormShift, - VFPBinaryFrm = 15 << FormShift, - VFPConv1Frm = 16 << FormShift, - VFPConv2Frm = 17 << FormShift, - VFPConv3Frm = 18 << FormShift, - VFPConv4Frm = 19 << FormShift, - VFPConv5Frm = 20 << FormShift, - VFPLdStFrm = 21 << FormShift, - VFPLdStMulFrm = 22 << FormShift, - VFPMiscFrm = 23 << FormShift, + VFPUnaryFrm = 15 << FormShift, + VFPBinaryFrm = 16 << FormShift, + VFPConv1Frm = 17 << FormShift, + VFPConv2Frm = 18 << FormShift, + VFPConv3Frm = 19 << FormShift, + VFPConv4Frm = 20 << FormShift, + VFPConv5Frm = 21 << FormShift, + VFPLdStFrm = 22 << FormShift, + VFPLdStMulFrm = 23 << FormShift, + VFPMiscFrm = 24 << FormShift, // Thumb format - ThumbFrm = 24 << FormShift, + ThumbFrm = 25 << FormShift, // Miscelleaneous format - MiscFrm = 25 << FormShift, + MiscFrm = 26 << FormShift, // NEON formats - NGetLnFrm = 26 << FormShift, - NSetLnFrm = 27 << FormShift, - NDupFrm = 28 << FormShift, - NLdStFrm = 29 << FormShift, - N1RegModImmFrm= 30 << FormShift, - N2RegFrm = 31 << FormShift, - NVCVTFrm = 32 << FormShift, - NVDupLnFrm = 33 << FormShift, - N2RegVShLFrm = 34 << FormShift, - N2RegVShRFrm = 35 << FormShift, - N3RegFrm = 36 << FormShift, - N3RegVShFrm = 37 << FormShift, - NVExtFrm = 38 << FormShift, - NVMulSLFrm = 39 << FormShift, - NVTBLFrm = 40 << FormShift, + NGetLnFrm = 27 << FormShift, + NSetLnFrm = 28 << FormShift, + NDupFrm = 29 << FormShift, + NLdStFrm = 30 << FormShift, + N1RegModImmFrm= 31 << FormShift, + N2RegFrm = 32 << FormShift, + NVCVTFrm = 33 << FormShift, + NVDupLnFrm = 34 << FormShift, + N2RegVShLFrm = 35 << FormShift, + N2RegVShRFrm = 36 << FormShift, + N3RegFrm = 37 << FormShift, + N3RegVShFrm = 38 << FormShift, + NVExtFrm = 39 << FormShift, + NVMulSLFrm = 40 << FormShift, + NVTBLFrm = 41 << FormShift, //===------------------------------------------------------------------===// // Misc flags. @@ -198,7 +200,7 @@ namespace ARMII { } class ARMBaseInstrInfo : public TargetInstrInfoImpl { - const ARMSubtarget& Subtarget; + const ARMSubtarget &Subtarget; protected: // Can be only subclassed. explicit ARMBaseInstrInfo(const ARMSubtarget &STI); @@ -223,7 +225,7 @@ public: virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const; + bool AllowModify = false) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, @@ -262,12 +264,6 @@ public: /// virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; virtual unsigned isStoreToStackSlot(const MachineInstr *MI, @@ -341,6 +337,17 @@ public: unsigned NumInstrs) const { return NumInstrs && NumInstrs == 1; } + + /// AnalyzeCompare - For a comparison instruction, return the source register + /// in SrcReg and the value it compares against in CmpValue. Return true if + /// the comparison instruction can be analyzed. + virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + int &CmpValue) const; + + /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so + /// that we can remove a "comparison with zero". + virtual bool ConvertToSetZeroFlag(MachineInstr *Instr, + MachineInstr *CmpInstr) const; }; static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 182bd9937145..eceafad63f17 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -40,13 +40,20 @@ #include "llvm/Support/CommandLine.h" namespace llvm { -cl::opt<bool> -ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true), - cl::desc("Reuse repeated frame index values")); +static cl::opt<bool> +ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false), + cl::desc("Force use of virtual base registers for stack load/store")); +static cl::opt<bool> +EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden, + cl::desc("Enable pre-regalloc stack frame index allocation")); } using namespace llvm; +static cl::opt<bool> +EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true), + cl::desc("Enable use of a base pointer for complex stack frames")); + unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, bool *isSPVFP) { if (isSPVFP) @@ -143,7 +150,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), TII(tii), STI(sti), - FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) { + FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), + BasePtr(ARM::R6) { } const unsigned* @@ -176,8 +184,11 @@ getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); - if (STI.isTargetDarwin() || hasFP(MF)) + Reserved.set(ARM::FPSCR); + if (hasFP(MF)) Reserved.set(FramePtr); + if (hasBasePointer(MF)) + Reserved.set(BasePtr); // Some targets reserve R9. if (STI.isR9Reserved()) Reserved.set(ARM::R9); @@ -191,9 +202,13 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, case ARM::SP: case ARM::PC: return true; + case ARM::R6: + if (hasBasePointer(MF)) + return true; + break; case ARM::R7: case ARM::R11: - if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF))) + if (FramePtr == Reg && hasFP(MF)) return true; break; case ARM::R9: @@ -510,7 +525,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, return std::make_pair(RC->allocation_order_begin(MF), RC->allocation_order_end(MF)); - if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!hasFP(MF)) { if (!STI.isR9Reserved()) return std::make_pair(GPREven1, GPREven1 + (sizeof(GPREven1)/sizeof(unsigned))); @@ -539,7 +554,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, return std::make_pair(RC->allocation_order_begin(MF), RC->allocation_order_end(MF)); - if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!hasFP(MF)) { if (!STI.isR9Reserved()) return std::make_pair(GPROdd1, GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned))); @@ -609,30 +624,68 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, /// or if frame pointer elimination is disabled. /// bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { + // Mac OS X requires FP not to be clobbered for backtracing purpose. + if (STI.isTargetDarwin()) + return true; + const MachineFrameInfo *MFI = MF.getFrameInfo(); - return ((DisableFramePointerElim(MF) && MFI->adjustsStack())|| + // Always eliminate non-leaf frame pointers. + return ((DisableFramePointerElim(MF) && MFI->hasCalls()) || needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); } +bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + if (!EnableBasePointer) + return false; + + if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) + return true; + + // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited + // negative range for ldr/str (255), and thumb1 is positive offsets only. + // It's going to be better to use the SP or Base Pointer instead. When there + // are variable sized objects, we can't reference off of the SP, so we + // reserve a Base Pointer. + if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) { + // Conservatively estimate whether the negative offset from the frame + // pointer will be sufficient to reach. If a function has a smallish + // frame, it's less likely to have lots of spills and callee saved + // space, so it's all more likely to be within range of the frame pointer. + // If it's wrong, the scavenger will still enable access to work, it just + // won't be optimal. + if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128) + return false; + return true; + } + + return false; +} + bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - return (RealignStack && - !AFI->isThumb1OnlyFunction() && - !MFI->hasVarSizedObjects()); + // We can't realign the stack if: + // 1. Dynamic stack realignment is explicitly disabled, + // 2. This is a Thumb1 function (it's not useful, so we don't bother), or + // 3. There are VLAs in the function and the base pointer is disabled. + return (RealignStack && !AFI->isThumb1OnlyFunction() && + (!MFI->hasVarSizedObjects() || EnableBasePointer)); } bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const Function *F = MF.getFunction(); unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - return (RealignStack && - !AFI->isThumb1OnlyFunction() && - (MFI->getMaxAlignment() > StackAlign) && - !MFI->hasVarSizedObjects()); + bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) || + F->hasFnAttr(Attribute::StackAlignment)); + + return requiresRealignment && canRealignStack(MF); } bool ARMBaseRegisterInfo:: @@ -668,6 +721,7 @@ static unsigned estimateStackSize(MachineFunction &MF) { /// instructions will require a scratch register during their expansion later. unsigned ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned Limit = (1 << 12) - 1; for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); @@ -693,7 +747,10 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { Limit = std::min(Limit, ((1U << 8) - 1) * 4); break; case ARMII::AddrModeT2_i12: - if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1); + // i12 supports only positive offset so these will be converted to + // i8 opcodes. See llvm::rewriteT2FrameIndex. + if (hasFP(MF) && AFI->hasStackFrame()) + Limit = std::min(Limit, (1U << 8) - 1); break; case ARMII::AddrMode6: // Addressing mode 6 (load/store) instructions can't encode an @@ -710,6 +767,19 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { return Limit; } +static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, + const ARMBaseInstrInfo &TII) { + unsigned FnSize = 0; + for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + const MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); + I != E; ++I) + FnSize += TII.GetInstSizeInBytes(I); + } + return FnSize; +} + void ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -737,6 +807,10 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0) MF.getRegInfo().setPhysRegUsed(ARM::LR); + // Spill the BasePtr if it's used. + if (hasBasePointer(MF)) + MF.getRegInfo().setPhysRegUsed(BasePtr); + // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. const unsigned *CSRegs = getCalleeSavedRegs(); @@ -807,7 +881,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, bool ForceLRSpill = false; if (!LRSpilled && AFI->isThumb1OnlyFunction()) { - unsigned FnSize = TII.GetFunctionSizeInBytes(MF); + unsigned FnSize = GetFunctionSizeInBytes(MF, TII); // Force LR to be spilled if the Thumb function size is > 2048. This enables // use of BL to implement far jump. If it turns out that it's not needed // then the branch fix up path will undo it. @@ -824,13 +898,19 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // slot of the previous FP. Also, if we have variable sized objects in the // function, stack slot references will often be negative, and some of // our instructions are positive-offset only, so conservatively consider - // that case to want a spill slot (or register) as well. + // that case to want a spill slot (or register) as well. Similarly, if + // the function adjusts the stack pointer during execution and the + // adjustments aren't already part of our stack size estimate, our offset + // calculations may be off, so be conservative. // FIXME: We could add logic to be more precise about negative offsets // and which instructions will need a scratch register for them. Is it // worth the effort and added fragility? bool BigStack = - (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >= - estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects(); + (RS && + (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= + estimateRSStackSizeLimit(MF))) + || MFI->hasVarSizedObjects() + || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); bool ExtraCSSpill = false; if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) { @@ -848,9 +928,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, ExtraCSSpill = true; } - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - if (STI.isTargetDarwin() || hasFP(MF)) { + if (hasFP(MF)) { MF.getRegInfo().setPhysRegUsed(FramePtr); NumGPRSpills++; } @@ -941,55 +1019,88 @@ unsigned ARMBaseRegisterInfo::getRARegister() const { return ARM::LR; } -unsigned +unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - if (STI.isTargetDarwin() || hasFP(MF)) + if (hasFP(MF)) return FramePtr; return ARM::SP; } +// Provide a base+offset reference to an FI slot for debug info. It's the +// same as what we use for resolving the code-gen references for now. +// FIXME: This can go wrong when references are SP-relative and simple call +// frames aren't used. int ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { + return ResolveFrameIndexReference(MF, FI, FrameReg, 0); +} + +int +ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg, + int SPAdj) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); + int FPOffset = Offset - AFI->getFramePtrSpillOffset(); bool isFixed = MFI->isFixedObjectIndex(FI); FrameReg = ARM::SP; + Offset += SPAdj; if (AFI->isGPRCalleeSavedArea1Frame(FI)) - Offset -= AFI->getGPRCalleeSavedArea1Offset(); + return Offset - AFI->getGPRCalleeSavedArea1Offset(); else if (AFI->isGPRCalleeSavedArea2Frame(FI)) - Offset -= AFI->getGPRCalleeSavedArea2Offset(); + return Offset - AFI->getGPRCalleeSavedArea2Offset(); else if (AFI->isDPRCalleeSavedAreaFrame(FI)) - Offset -= AFI->getDPRCalleeSavedAreaOffset(); - else if (needsStackRealignment(MF)) { - // When dynamically realigning the stack, use the frame pointer for - // parameters, and the stack pointer for locals. + return Offset - AFI->getDPRCalleeSavedAreaOffset(); + + // When dynamically realigning the stack, use the frame pointer for + // parameters, and the stack/base pointer for locals. + if (needsStackRealignment(MF)) { assert (hasFP(MF) && "dynamic stack realignment without a FP!"); if (isFixed) { FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); + Offset = FPOffset; + } else if (MFI->hasVarSizedObjects()) { + assert(hasBasePointer(MF) && + "VLAs and dynamic stack alignment, but missing base pointer!"); + FrameReg = BasePtr; } - } else if (hasFP(MF) && AFI->hasStackFrame()) { - if (isFixed || MFI->hasVarSizedObjects()) { - // Use frame pointer to reference fixed objects unless this is a - // frameless function. + return Offset; + } + + // If there is a frame pointer, use it when we can. + if (hasFP(MF) && AFI->hasStackFrame()) { + // Use frame pointer to reference fixed objects. Use it for locals if + // there are VLAs (and thus the SP isn't reliable as a base). + if (isFixed || (MFI->hasVarSizedObjects() && !hasBasePointer(MF))) { FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); + return FPOffset; + } else if (MFI->hasVarSizedObjects()) { + assert(hasBasePointer(MF) && "missing base pointer!"); + // Use the base register since we have it. + FrameReg = BasePtr; } else if (AFI->isThumb2Function()) { - // In Thumb2 mode, the negative offset is very limited. - int FPOffset = Offset - AFI->getFramePtrSpillOffset(); + // In Thumb2 mode, the negative offset is very limited. Try to avoid + // out of range references. if (FPOffset >= -255 && FPOffset < 0) { FrameReg = getFrameRegister(MF); - Offset = FPOffset; + return FPOffset; } + } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { + // Otherwise, use SP or FP, whichever is closer to the stack slot. + FrameReg = getFrameRegister(MF); + return FPOffset; } } + // Use the base pointer if we have one. + if (hasBasePointer(MF)) + FrameReg = BasePtr; return Offset; } - int ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { @@ -1024,7 +1135,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, case ARM::R5: return ARM::R4; case ARM::R7: - return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6; + return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) + ? 0 : ARM::R6; case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; case ARM::R11: @@ -1113,7 +1225,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, case ARM::R4: return ARM::R5; case ARM::R6: - return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7; + return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) + ? 0 : ARM::R7; case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; case ARM::R10: @@ -1220,13 +1333,18 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const { return true; } +bool ARMBaseRegisterInfo:: +requiresVirtualBaseRegisters(const MachineFunction &MF) const { + return EnableLocalStackAlloc; +} + // hasReservedCallFrame - Under normal circumstances, when a frame pointer is // not required, we reserve argument space for call sites in the function // immediately on entry to the current function. This eliminates the need for // add/sub sp brackets around call sites. Returns true if the call frame is // included as part of the stack frame. bool ARMBaseRegisterInfo:: -hasReservedCallFrame(MachineFunction &MF) const { +hasReservedCallFrame(const MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); // It's not always a good idea to include the call frame as part of the @@ -1244,7 +1362,7 @@ hasReservedCallFrame(MachineFunction &MF) const { // is not sufficient here since we still may reference some objects via SP // even when FP is available in Thumb2 mode. bool ARMBaseRegisterInfo:: -canSimplifyCallFramePseudos(MachineFunction &MF) const { +canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); } @@ -1305,10 +1423,258 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -unsigned +int64_t ARMBaseRegisterInfo:: +getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { + const TargetInstrDesc &Desc = MI->getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + int64_t InstrOffs = 0;; + int Scale = 1; + unsigned ImmIdx = 0; + switch (AddrMode) { + case ARMII::AddrModeT2_i8: + case ARMII::AddrModeT2_i12: + // i8 supports only negative, and i12 supports only positive, so + // based on Offset sign, consider the appropriate instruction + InstrOffs = MI->getOperand(Idx+1).getImm(); + Scale = 1; + break; + case ARMII::AddrMode5: { + // VFP address mode. + const MachineOperand &OffOp = MI->getOperand(Idx+1); + InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm()); + if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub) + InstrOffs = -InstrOffs; + Scale = 4; + break; + } + case ARMII::AddrMode2: { + ImmIdx = Idx+2; + InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs = -InstrOffs; + break; + } + case ARMII::AddrMode3: { + ImmIdx = Idx+2; + InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs = -InstrOffs; + break; + } + case ARMII::AddrModeT1_s: { + ImmIdx = Idx+1; + InstrOffs = MI->getOperand(ImmIdx).getImm(); + Scale = 4; + break; + } + default: + llvm_unreachable("Unsupported addressing mode!"); + break; + } + + return InstrOffs * Scale; +} + +/// needsFrameBaseReg - Returns true if the instruction's frame index +/// reference would be better served by a base register other than FP +/// or SP. Used by LocalStackFrameAllocation to determine which frame index +/// references it should create new base registers for. +bool ARMBaseRegisterInfo:: +needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { + for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) { + assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); + } + + // It's the load/store FI references that cause issues, as it can be difficult + // to materialize the offset if it won't fit in the literal field. Estimate + // based on the size of the local frame and some conservative assumptions + // about the rest of the stack frame (note, this is pre-regalloc, so + // we don't know everything for certain yet) whether this offset is likely + // to be out of range of the immediate. Return true if so. + + // We only generate virtual base registers for loads and stores, so + // return false for everything else. + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case ARM::LDR: case ARM::LDRH: case ARM::LDRB: + case ARM::STR: case ARM::STRH: case ARM::STRB: + case ARM::t2LDRi12: case ARM::t2LDRi8: + case ARM::t2STRi12: case ARM::t2STRi8: + case ARM::VLDRS: case ARM::VLDRD: + case ARM::VSTRS: case ARM::VSTRD: + case ARM::tSTRspi: case ARM::tLDRspi: + if (ForceAllBaseRegAlloc) + return true; + break; + default: + return false; + } + + // Without a virtual base register, if the function has variable sized + // objects, all fixed-size local references will be via the frame pointer, + // Approximate the offset and see if it's legal for the instruction. + // Note that the incoming offset is based on the SP value at function entry, + // so it'll be negative. + MachineFunction &MF = *MI->getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + // Estimate an offset from the frame pointer. + // Conservatively assume all callee-saved registers get pushed. R4-R6 + // will be earlier than the FP, so we ignore those. + // R7, LR + int64_t FPOffset = Offset - 8; + // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15 + if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction()) + FPOffset -= 80; + // Estimate an offset from the stack pointer. + // The incoming offset is relating to the SP at the start of the function, + // but when we access the local it'll be relative to the SP after local + // allocation, so adjust our SP-relative offset by that allocation size. + Offset = -Offset; + Offset += MFI->getLocalFrameSize(); + // Assume that we'll have at least some spill slots allocated. + // FIXME: This is a total SWAG number. We should run some statistics + // and pick a real one. + Offset += 128; // 128 bytes of spill slots + + // If there is a frame pointer, try using it. + // The FP is only available if there is no dynamic realignment. We + // don't know for sure yet whether we'll need that, so we guess based + // on whether there are any local variables that would trigger it. + unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + if (hasFP(MF) && + !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { + if (isFrameOffsetLegal(MI, FPOffset)) + return false; + } + // If we can reference via the stack pointer, try that. + // FIXME: This (and the code that resolves the references) can be improved + // to only disallow SP relative references in the live range of + // the VLA(s). In practice, it's unclear how much difference that + // would make, but it may be worth doing. + if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset)) + return false; + + // The offset likely isn't legal, we want to allocate a virtual base register. + return true; +} + +/// materializeFrameBaseRegister - Insert defining instruction(s) for +/// BaseReg to be a pointer to FrameIdx before insertion point I. +void ARMBaseRegisterInfo:: +materializeFrameBaseRegister(MachineBasicBlock::iterator I, unsigned BaseReg, + int FrameIdx, int64_t Offset) const { + ARMFunctionInfo *AFI = + I->getParent()->getParent()->getInfo<ARMFunctionInfo>(); + unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : + (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri); + + MachineInstrBuilder MIB = + BuildMI(*I->getParent(), I, I->getDebugLoc(), TII.get(ADDriOpc), BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset); + if (!AFI->isThumb1OnlyFunction()) + AddDefaultCC(AddDefaultPred(MIB)); +} + +void +ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const { + MachineInstr &MI = *I; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + int Off = Offset; // ARM doesn't need the general 64-bit offsets + unsigned i = 0; + + assert(!AFI->isThumb1OnlyFunction() && + "This resolveFrameIndex does not support Thumb1!"); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + bool Done = false; + if (!AFI->isThumbFunction()) + Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII); + else { + assert(AFI->isThumb2Function()); + Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII); + } + assert (Done && "Unable to resolve frame index!"); +} + +bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + int64_t Offset) const { + const TargetInstrDesc &Desc = MI->getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + unsigned i = 0; + + while (!MI->getOperand(i).isFI()) { + ++i; + assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); + } + + // AddrMode4 and AddrMode6 cannot handle any offset. + if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) + return Offset == 0; + + unsigned NumBits = 0; + unsigned Scale = 1; + bool isSigned = true; + switch (AddrMode) { + case ARMII::AddrModeT2_i8: + case ARMII::AddrModeT2_i12: + // i8 supports only negative, and i12 supports only positive, so + // based on Offset sign, consider the appropriate instruction + Scale = 1; + if (Offset < 0) { + NumBits = 8; + Offset = -Offset; + } else { + NumBits = 12; + } + break; + case ARMII::AddrMode5: + // VFP address mode. + NumBits = 8; + Scale = 4; + break; + case ARMII::AddrMode2: + NumBits = 12; + break; + case ARMII::AddrMode3: + NumBits = 8; + break; + case ARMII::AddrModeT1_s: + NumBits = 5; + Scale = 4; + isSigned = false; + break; + default: + llvm_unreachable("Unsupported addressing mode!"); + break; + } + + Offset += getFrameIndexInstrOffset(MI, i); + // Make sure the offset is encodable for instructions that scale the + // immediate. + if ((Offset & (Scale-1)) != 0) + return false; + + if (isSigned && Offset < 0) + Offset = -Offset; + + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) + return true; + + return false; +} + +void ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); @@ -1325,16 +1691,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); unsigned FrameReg; - int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg); - if (FrameReg != ARM::SP) - SPAdj = 0; - Offset += SPAdj; + int Offset = ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); // Special handling of dbg_value instructions. if (MI.isDebugValue()) { MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); MI.getOperand(i+1).ChangeToImmediate(Offset); - return 0; + return; } // Modify MI as necessary to handle as much of 'Offset' as possible @@ -1346,7 +1709,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); } if (Done) - return 0; + return; // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above, handle the rest, providing a register that is @@ -1366,10 +1729,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); else { ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); - if (Value) { - Value->first = FrameReg; // use the frame register as a kind indicator - Value->second = Offset; - } if (!AFI->isThumbFunction()) emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, Pred, PredReg, TII); @@ -1379,10 +1738,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, Pred, PredReg, TII); } MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); - if (!ReuseFrameIndexVals) - ScratchReg = 0; } - return ScratchReg; } /// Move iterator past the next bunch of callee save load / store ops for @@ -1494,7 +1850,8 @@ emitPrologue(MachineFunction &MF) const { // Otherwise, if this is not Darwin, all the callee-saved registers go // into spill area 1, including the FP in R11. In either case, it is // now safe to emit this assignment. - if (STI.isTargetDarwin() || hasFP(MF)) { + bool HasFP = hasFP(MF); + if (HasFP) { unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) @@ -1513,7 +1870,7 @@ emitPrologue(MachineFunction &MF) const { unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - if (STI.isTargetDarwin() || hasFP(MF)) + if (HasFP) AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); @@ -1525,18 +1882,22 @@ emitPrologue(MachineFunction &MF) const { if (NumBytes) { // Adjust SP after all the callee-save spills. emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + if (HasFP) + AFI->setShouldRestoreSPFromFP(true); } if (STI.isTargetELF() && hasFP(MF)) { MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); + AFI->setShouldRestoreSPFromFP(true); } AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); - // If we need dynamic stack realignment, do it here. + // If we need dynamic stack realignment, do it here. Be paranoid and make + // sure if we also have VLAs, we have a base pointer for frame access. if (needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); @@ -1562,7 +1923,28 @@ emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(ARM::R4, RegState::Kill); } + + AFI->setShouldRestoreSPFromFP(true); + } + + // If we need a base pointer, set it up here. It's whatever the value + // of the stack pointer is at this point. Any variable size objects + // will be allocated after this, so we can still use the base pointer + // to reference locals. + if (hasBasePointer(MF)) { + if (isARM) + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), BasePtr) + .addReg(ARM::SP) + .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr) + .addReg(ARM::SP); } + + // If the frame has variable sized objects then the epilogue must restore + // the sp from fp. + if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects()) + AFI->setShouldRestoreSPFromFP(true); } static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { @@ -1617,34 +1999,25 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - bool HasFP = hasFP(MF); - if ((STI.isTargetDarwin() && NumBytes) || HasFP) { + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; - // Reset SP based on frame pointer only if the stack frame extends beyond - // frame pointer stack slot or target is ELF and the function has FP. - if (HasFP || - AFI->getGPRCalleeSavedArea2Size() || - AFI->getDPRCalleeSavedAreaSize() || - AFI->getDPRCalleeSavedAreaOffset()) { - if (NumBytes) { - if (isARM) - emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, - ARMCC::AL, 0, TII); - else - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, - ARMCC::AL, 0, TII); - } else { - // Thumb2 or ARM. - if (isARM) - BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) - .addReg(FramePtr) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); - else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) - .addReg(FramePtr); - } + if (NumBytes) { + if (isARM) + emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + else + emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + } else { + // Thumb2 or ARM. + if (isARM) + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) + .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) + .addReg(FramePtr); } } else if (NumBytes) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); @@ -1670,7 +2043,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi) { - BuildMI(MBB, MBBI, dl, + BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)). addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); @@ -1685,7 +2058,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { } else if (RetOpcode == ARM::TCRETURNriND) { BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). addReg(JumpTarget.getReg(), RegState::Kill); - } + } MachineInstr *NewMI = prior(MBBI); for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index f7ee0d5cc66d..fa2eb6c10498 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -44,7 +44,7 @@ static inline bool isARMLowRegister(unsigned Reg) { } } -struct ARMBaseRegisterInfo : public ARMGenRegisterInfo { +class ARMBaseRegisterInfo : public ARMGenRegisterInfo { protected: const ARMBaseInstrInfo &TII; const ARMSubtarget &STI; @@ -52,6 +52,11 @@ protected: /// FramePtr - ARM physical register used as frame ptr. unsigned FramePtr; + /// BasePtr - ARM physical register used as a base ptr in complex stack + /// frames. I.e., when we need a 3rd base, not just SP and FP, due to + /// variable size stack objects. + unsigned BasePtr; + // Can be only subclassed. explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); @@ -102,9 +107,18 @@ public: MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; + bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; + int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const; + bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const; + void materializeFrameBaseRegister(MachineBasicBlock::iterator I, + unsigned BaseReg, int FrameIdx, + int64_t Offset) const; + void resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const; + bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const; bool cannotEliminateFrame(const MachineFunction &MF) const; @@ -116,6 +130,8 @@ public: unsigned getFrameRegister(const MachineFunction &MF) const; int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; + int ResolveFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg, int SPAdj) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; // Exception handling queries. @@ -144,16 +160,17 @@ public: virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const; - virtual bool hasReservedCallFrame(MachineFunction &MF) const; - virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const; + virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const; + + virtual bool hasReservedCallFrame(const MachineFunction &MF) const; + virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const; virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; virtual void emitPrologue(MachineFunction &MF) const; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 8fdb07f81626..293e32aa5376 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -1,4 +1,4 @@ -//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===// +//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -68,7 +68,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[ "ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, - CCIfType<[i32], CCIfAlign<"8", CCAssignToStack<4, 8>>>, + CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[f64], CCAssignToStack<8, 8>>, CCIfType<[v2f64], CCAssignToStack<16, 8>> diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 7895cb071922..b1a702f90cfc 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -65,7 +65,7 @@ namespace { static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(&ID), JTI(0), + : MachineFunctionPass(ID), JTI(0), II((const ARMInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), @@ -124,6 +124,8 @@ namespace { void emitMiscArithInstruction(const MachineInstr &MI); + void emitSaturateInstruction(const MachineInstr &MI); + void emitBranchInstruction(const MachineInstr &MI); void emitInlineJumpTable(unsigned JTIndex); @@ -389,6 +391,9 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { case ARMII::ArithMiscFrm: emitMiscArithInstruction(MI); break; + case ARMII::SatFrm: + emitSaturateInstruction(MI); + break; case ARMII::BrFrm: emitBranchInstruction(MI); break; @@ -654,6 +659,19 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { switch (Opcode) { default: llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); + case ARM::BX: + case ARM::BMOVPCRX: + case ARM::BXr9: + case ARM::BMOVPCRXr9: { + // First emit mov lr, pc + unsigned Binary = 0x01a0e00f; + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + emitWordLE(Binary); + + // and then emit the branch. + emitMiscBranchInstruction(MI); + break; + } case TargetOpcode::INLINEASM: { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. @@ -662,7 +680,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { } break; } - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; @@ -1209,12 +1227,58 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { // Encode shift_imm. unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); + if (TID.Opcode == ARM::PKHTB) { + assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!"); + if (ShiftAmt == 32) + ShiftAmt = 0; + } assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); Binary |= ShiftAmt << ARMII::ShiftShift; emitWordLE(Binary); } +void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGen. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode saturate bit position. + unsigned Pos = MI.getOperand(1).getImm(); + if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16) + Pos -= 1; + assert((Pos < 16 || (Pos < 32 && + TID.Opcode != ARM::SSAT16 && + TID.Opcode != ARM::USAT16)) && + "saturate bit position out of range"); + Binary |= Pos << 16; + + // Encode Rm + Binary |= getMachineOpValue(MI, 2); + + // Encode shift_imm. + if (TID.getNumOperands() == 4) { + unsigned ShiftOp = MI.getOperand(3).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + if (Opc == ARM_AM::asr) + Binary |= (1 << 6); + unsigned ShiftAmt = MI.getOperand(3).getImm(); + if (ShiftAmt == 32 && Opc == ARM_AM::asr) + ShiftAmt = 0; + assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); + Binary |= ShiftAmt << ARMII::ShiftShift; + } + + emitWordLE(Binary); +} + void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); @@ -1485,7 +1549,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { // Set addressing mode by modifying bits U(23) and P(24) const MachineOperand &MO = MI.getOperand(OpIdx++); - Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm())); + Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm())); // Set bit W(21) if (IsUpdating) @@ -1494,7 +1558,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { // First register is encoded in Dd. Binary |= encodeVFPRd(MI, OpIdx+2); - // Number of registers are encoded in offset field. + // Count the number of registers. unsigned NumRegs = 1; for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 65a3da6f1617..60e923bd2c38 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -18,9 +18,9 @@ #include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMInstrInfo.h" +#include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -165,7 +165,7 @@ namespace { /// HasInlineAsm - True if the function contains inline assembly. bool HasInlineAsm; - const TargetInstrInfo *TII; + const ARMInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; bool isThumb; @@ -173,7 +173,7 @@ namespace { bool isThumb2; public: static char ID; - ARMConstantIslands() : MachineFunctionPass(&ID) {} + ARMConstantIslands() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -272,7 +272,7 @@ FunctionPass *llvm::createARMConstantIslandPass() { bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { MachineConstantPool &MCP = *MF.getConstantPool(); - TII = MF.getTarget().getInstrInfo(); + TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo(); AFI = MF.getInfo<ARMFunctionInfo>(); STI = &MF.getTarget().getSubtarget<ARMSubtarget>(); @@ -323,6 +323,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // constant pool users. InitialFunctionScan(MF, CPEMIs); CPEMIs.clear(); + DEBUG(dumpBBs()); + /// Remove dead constant pool entries. RemoveUnusedCPEntries(); @@ -355,7 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { } // Shrink 32-bit Thumb2 branch, load, and store instructions. - if (isThumb2) + if (isThumb2 && !STI->prefers32BitThumb()) MadeChange |= OptimizeThumb2Instructions(MF); // After a while, this might be made debug-only, but it is not expensive. @@ -366,6 +368,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) MadeChange |= UndoLRSpillRestore(); + DEBUG(errs() << '\n'; dumpBBs()); + BBSizes.clear(); BBOffsets.clear(); WaterList.clear(); @@ -509,6 +513,10 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, case ARM::tBR_JTr: // A Thumb1 table jump may involve padding; for the offsets to // be right, functions containing these must be 4-byte aligned. + // tBR_JTr expands to a mov pc followed by .align 2 and then the jump + // table entries. So this code checks whether offset of tBR_JTr + 2 + // is aligned. That is held in Offset+MBBSize, which already has + // 2 added in for the size of the mov pc instruction. MF.EnsureAlignment(2U); if ((Offset+MBBSize)%4 != 0 || HasInlineAsm) // FIXME: Add a pseudo ALIGN instruction instead. @@ -768,28 +776,54 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { WaterList.insert(IP, OrigBB); NewWaterList.insert(OrigBB); - // Figure out how large the first NewMBB is. (It cannot - // contain a constpool_entry or tablejump.) - unsigned NewBBSize = 0; - for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); - I != E; ++I) - NewBBSize += TII->GetInstSizeInBytes(I); - unsigned OrigBBI = OrigBB->getNumber(); unsigned NewBBI = NewBB->getNumber(); - // Set the size of NewBB in BBSizes. - BBSizes[NewBBI] = NewBBSize; - // We removed instructions from UserMBB, subtract that off from its size. - // Add 2 or 4 to the block to count the unconditional branch we added to it. int delta = isThumb1 ? 2 : 4; - BBSizes[OrigBBI] -= NewBBSize - delta; + + // Figure out how large the OrigBB is. As the first half of the original + // block, it cannot contain a tablejump. The size includes + // the new jump we added. (It should be possible to do this without + // recounting everything, but it's very confusing, and this is rarely + // executed.) + unsigned OrigBBSize = 0; + for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end(); + I != E; ++I) + OrigBBSize += TII->GetInstSizeInBytes(I); + BBSizes[OrigBBI] = OrigBBSize; // ...and adjust BBOffsets for NewBB accordingly. BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI]; + // Figure out how large the NewMBB is. As the second half of the original + // block, it may contain a tablejump. + unsigned NewBBSize = 0; + for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); + I != E; ++I) + NewBBSize += TII->GetInstSizeInBytes(I); + // Set the size of NewBB in BBSizes. It does not include any padding now. + BBSizes[NewBBI] = NewBBSize; + + MachineInstr* ThumbJTMI = prior(NewBB->end()); + if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { + // We've added another 2-byte instruction before this tablejump, which + // means we will always need padding if we didn't before, and vice versa. + + // The original offset of the jump instruction was: + unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta; + if (OrigOffset%4 == 0) { + // We had padding before and now we don't. No net change in code size. + delta = 0; + } else { + // We didn't have padding before and now we do. + BBSizes[NewBBI] += 2; + delta = 4; + } + } + // All BBOffsets following these blocks must be modified. - AdjustBBOffsetsAfter(NewBB, delta); + if (delta) + AdjustBBOffsetsAfter(NewBB, delta); return NewBB; } @@ -915,6 +949,10 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, } // Thumb1 jump tables require padding. They should be at the end; // following unconditional branches are removed by AnalyzeBranch. + // tBR_JTr expands to a mov pc followed by .align 2 and then the jump + // table entries. So this code checks whether offset of tBR_JTr + // is aligned; if it is, the offset of the jump table following the + // instruction will not be aligned, and we need padding. MachineInstr *ThumbJTMI = prior(MBB->end()); if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { unsigned NewMIOffset = GetOffsetOf(ThumbJTMI); @@ -1143,11 +1181,13 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, MachineBasicBlock::iterator MI = UserMI; ++MI; unsigned CPUIndex = CPUserIndex+1; + unsigned NumCPUsers = CPUsers.size(); + MachineInstr *LastIT = 0; for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); Offset < BaseInsertOffset; Offset += TII->GetInstSizeInBytes(MI), - MI = llvm::next(MI)) { - if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) { + MI = llvm::next(MI)) { + if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { CPUser &U = CPUsers[CPUIndex]; if (!OffsetIsInRange(Offset, EndInsertOffset, U.MaxDisp, U.NegOk, U.IsSoImm)) { @@ -1159,9 +1199,23 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm(); CPUIndex++; } + + // Remember the last IT instruction. + if (MI->getOpcode() == ARM::t2IT) + LastIT = MI; } + DEBUG(errs() << "Split in middle of big block\n"); - NewMBB = SplitBlockBeforeInstr(prior(MI)); + --MI; + + // Avoid splitting an IT block. + if (LastIT) { + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + if (CC != ARMCC::AL) + MI = LastIT; + } + NewMBB = SplitBlockBeforeInstr(MI); } } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 9c62597b4323..fc2e3c3fadae 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -19,14 +19,21 @@ #include "ARMBaseInstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" - +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; namespace { class ARMExpandPseudo : public MachineFunctionPass { + // Constants for register spacing in NEON load/store instructions. + enum NEONRegSpacing { + SingleSpc, + EvenDblSpc, + OddDblSpc + }; + public: static char ID; - ARMExpandPseudo() : MachineFunctionPass(&ID) {} + ARMExpandPseudo() : MachineFunctionPass(ID) {} const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -41,6 +48,10 @@ namespace { void TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMBB(MachineBasicBlock &MBB); + void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc, + bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs); + void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc, + bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs); }; char ARMExpandPseudo::ID = 0; } @@ -63,6 +74,129 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, } } +/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register +/// operands to real VLD instructions with D register operands. +void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI, + unsigned Opc, bool hasWriteBack, + NEONRegSpacing RegSpc, unsigned NumRegs) { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + unsigned OpIdx = 0; + + bool DstIsDead = MI.getOperand(OpIdx).isDead(); + unsigned DstReg = MI.getOperand(OpIdx++).getReg(); + unsigned D0, D1, D2, D3; + if (RegSpc == SingleSpc) { + D0 = TRI->getSubReg(DstReg, ARM::dsub_0); + D1 = TRI->getSubReg(DstReg, ARM::dsub_1); + D2 = TRI->getSubReg(DstReg, ARM::dsub_2); + D3 = TRI->getSubReg(DstReg, ARM::dsub_3); + } else if (RegSpc == EvenDblSpc) { + D0 = TRI->getSubReg(DstReg, ARM::dsub_0); + D1 = TRI->getSubReg(DstReg, ARM::dsub_2); + D2 = TRI->getSubReg(DstReg, ARM::dsub_4); + D3 = TRI->getSubReg(DstReg, ARM::dsub_6); + } else { + assert(RegSpc == OddDblSpc && "unknown register spacing for VLD"); + D0 = TRI->getSubReg(DstReg, ARM::dsub_1); + D1 = TRI->getSubReg(DstReg, ARM::dsub_3); + D2 = TRI->getSubReg(DstReg, ARM::dsub_5); + D3 = TRI->getSubReg(DstReg, ARM::dsub_7); + } + MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 2) + MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 3) + MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); + + if (hasWriteBack) { + bool WBIsDead = MI.getOperand(OpIdx).isDead(); + unsigned WBReg = MI.getOperand(OpIdx++).getReg(); + MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead)); + } + // Copy the addrmode6 operands. + bool AddrIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill)); + MIB.addImm(MI.getOperand(OpIdx++).getImm()); + if (hasWriteBack) { + // Copy the am6offset operand. + bool OffsetIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill)); + } + + MIB = AddDefaultPred(MIB); + TransferImpOps(MI, MIB, MIB); + // For an instruction writing the odd subregs, add an implicit use of the + // super-register because the even subregs were loaded separately. + if (RegSpc == OddDblSpc) + MIB.addReg(DstReg, RegState::Implicit); + // Add an implicit def for the super-register. + MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); + MI.eraseFromParent(); +} + +/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register +/// operands to real VST instructions with D register operands. +void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI, + unsigned Opc, bool hasWriteBack, + NEONRegSpacing RegSpc, unsigned NumRegs) { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + unsigned OpIdx = 0; + if (hasWriteBack) { + bool DstIsDead = MI.getOperand(OpIdx).isDead(); + unsigned DstReg = MI.getOperand(OpIdx++).getReg(); + MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); + } + // Copy the addrmode6 operands. + bool AddrIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill)); + MIB.addImm(MI.getOperand(OpIdx++).getImm()); + if (hasWriteBack) { + // Copy the am6offset operand. + bool OffsetIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill)); + } + + bool SrcIsKill = MI.getOperand(OpIdx).isKill(); + unsigned SrcReg = MI.getOperand(OpIdx).getReg(); + unsigned D0, D1, D2, D3; + if (RegSpc == SingleSpc) { + D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); + D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); + D2 = TRI->getSubReg(SrcReg, ARM::dsub_2); + D3 = TRI->getSubReg(SrcReg, ARM::dsub_3); + } else if (RegSpc == EvenDblSpc) { + D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); + D1 = TRI->getSubReg(SrcReg, ARM::dsub_2); + D2 = TRI->getSubReg(SrcReg, ARM::dsub_4); + D3 = TRI->getSubReg(SrcReg, ARM::dsub_6); + } else { + assert(RegSpc == OddDblSpc && "unknown register spacing for VST"); + D0 = TRI->getSubReg(SrcReg, ARM::dsub_1); + D1 = TRI->getSubReg(SrcReg, ARM::dsub_3); + D2 = TRI->getSubReg(SrcReg, ARM::dsub_5); + D3 = TRI->getSubReg(SrcReg, ARM::dsub_7); + } + + MIB.addReg(D0).addReg(D1); + if (NumRegs > 2) + MIB.addReg(D2); + if (NumRegs > 3) + MIB.addReg(D3); + MIB = AddDefaultPred(MIB); + TransferImpOps(MI, MIB, MIB); + if (SrcIsKill) + // Add an implicit kill for the super-reg. + (*MIB).addRegisterKilled(SrcReg, TRI, true); + MI.eraseFromParent(); +} + bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -71,9 +205,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineInstr &MI = *MBBI; MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); + bool ModifiedOp = true; unsigned Opcode = MI.getOpcode(); switch (Opcode) { - default: break; + default: + ModifiedOp = false; + break; + case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) @@ -92,10 +230,10 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { .addOperand(MI.getOperand(2)); TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); - Modified = true; break; } + case ARM::MOVi32imm: case ARM::t2MOVi32imm: { unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); @@ -104,9 +242,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { const MachineOperand &MO = MI.getOperand(1); MachineInstrBuilder LO16, HI16; - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == ARM::MOVi32imm ? + ARM::MOVi16 : ARM::t2MOVi16), DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == ARM::MOVi32imm ? + ARM::MOVTi16 : ARM::t2MOVTi16)) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) .addReg(DstReg); @@ -128,7 +270,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { HI16.addImm(Pred).addReg(PredReg); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); - Modified = true; break; } @@ -155,9 +296,211 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { .addReg(OddSrc, getKillRegState(SrcIsKill))); TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); - Modified = true; } + + case ARM::VLD1q8Pseudo: + ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break; + case ARM::VLD1q16Pseudo: + ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break; + case ARM::VLD1q32Pseudo: + ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break; + case ARM::VLD1q64Pseudo: + ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break; + case ARM::VLD1q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break; + case ARM::VLD1q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break; + case ARM::VLD1q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break; + case ARM::VLD1q64Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break; + + case ARM::VLD2d8Pseudo: + ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break; + case ARM::VLD2d16Pseudo: + ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break; + case ARM::VLD2d32Pseudo: + ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break; + case ARM::VLD2q8Pseudo: + ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break; + case ARM::VLD2q16Pseudo: + ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break; + case ARM::VLD2q32Pseudo: + ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break; + case ARM::VLD2d8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break; + case ARM::VLD2d16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break; + case ARM::VLD2d32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break; + case ARM::VLD2q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break; + case ARM::VLD2q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break; + case ARM::VLD2q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break; + + case ARM::VLD3d8Pseudo: + ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break; + case ARM::VLD3d16Pseudo: + ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break; + case ARM::VLD3d32Pseudo: + ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break; + case ARM::VLD1d64TPseudo: + ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break; + case ARM::VLD3d8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break; + case ARM::VLD3d16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break; + case ARM::VLD3d32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break; + case ARM::VLD1d64TPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break; + case ARM::VLD3q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q8oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break; + case ARM::VLD3q16oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break; + case ARM::VLD3q32oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break; + + case ARM::VLD4d8Pseudo: + ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break; + case ARM::VLD4d16Pseudo: + ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break; + case ARM::VLD4d32Pseudo: + ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break; + case ARM::VLD1d64QPseudo: + ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break; + case ARM::VLD4d8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break; + case ARM::VLD4d16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break; + case ARM::VLD4d32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break; + case ARM::VLD1d64QPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break; + case ARM::VLD4q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q8oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break; + case ARM::VLD4q16oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break; + case ARM::VLD4q32oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break; + + case ARM::VST1q8Pseudo: + ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break; + case ARM::VST1q16Pseudo: + ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break; + case ARM::VST1q32Pseudo: + ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break; + case ARM::VST1q64Pseudo: + ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break; + case ARM::VST1q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break; + case ARM::VST1q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break; + case ARM::VST1q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break; + case ARM::VST1q64Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break; + + case ARM::VST2d8Pseudo: + ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break; + case ARM::VST2d16Pseudo: + ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break; + case ARM::VST2d32Pseudo: + ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break; + case ARM::VST2q8Pseudo: + ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break; + case ARM::VST2q16Pseudo: + ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break; + case ARM::VST2q32Pseudo: + ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break; + case ARM::VST2d8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break; + case ARM::VST2d16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break; + case ARM::VST2d32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break; + case ARM::VST2q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break; + case ARM::VST2q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break; + case ARM::VST2q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break; + + case ARM::VST3d8Pseudo: + ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break; + case ARM::VST3d16Pseudo: + ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break; + case ARM::VST3d32Pseudo: + ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break; + case ARM::VST1d64TPseudo: + ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break; + case ARM::VST3d8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break; + case ARM::VST3d16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break; + case ARM::VST3d32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break; + case ARM::VST1d64TPseudo_UPD: + ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break; + case ARM::VST3q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q8oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break; + case ARM::VST3q16oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break; + case ARM::VST3q32oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break; + + case ARM::VST4d8Pseudo: + ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break; + case ARM::VST4d16Pseudo: + ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break; + case ARM::VST4d32Pseudo: + ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break; + case ARM::VST1d64QPseudo: + ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break; + case ARM::VST4d8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break; + case ARM::VST4d16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break; + case ARM::VST4d32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break; + case ARM::VST1d64QPseudo_UPD: + ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break; + case ARM::VST4q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break; + case ARM::VST4q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break; + case ARM::VST4q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break; + case ARM::VST4q8oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break; + case ARM::VST4q16oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break; + case ARM::VST4q32oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break; } + + if (ModifiedOp) + Modified = true; MBBI = NMBBI; } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp new file mode 100644 index 000000000000..4892eae95833 --- /dev/null +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -0,0 +1,665 @@ +//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ARM-specific support for the FastISel class. Some +// of the target-specific code is generated by tablegen in the file +// ARMGenFastISel.inc, which is #included here. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMRegisterInfo.h" +#include "ARMTargetMachine.h" +#include "ARMSubtarget.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +static cl::opt<bool> +EnableARMFastISel("arm-fast-isel", + cl::desc("Turn on experimental ARM fast-isel support"), + cl::init(false), cl::Hidden); + +namespace { + +class ARMFastISel : public FastISel { + + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + const TargetMachine &TM; + const TargetInstrInfo &TII; + const TargetLowering &TLI; + const ARMFunctionInfo *AFI; + + // Convenience variable to avoid checking all the time. + bool isThumb; + + public: + explicit ARMFastISel(FunctionLoweringInfo &funcInfo) + : FastISel(funcInfo), + TM(funcInfo.MF->getTarget()), + TII(*TM.getInstrInfo()), + TLI(*TM.getTargetLowering()) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); + isThumb = AFI->isThumbFunction(); + } + + // Code from FastISel.cpp. + virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass *RC); + virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); + virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm); + virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm); + virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm); + virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, + unsigned Op0, bool Op0IsKill, + uint32_t Idx); + + // Backend specific FastISel code. + virtual bool TargetSelectInstruction(const Instruction *I); + virtual unsigned TargetMaterializeConstant(const Constant *C); + + #include "ARMGenFastISel.inc" + + // Instruction selection routines. + virtual bool ARMSelectLoad(const Instruction *I); + virtual bool ARMSelectStore(const Instruction *I); + virtual bool ARMSelectBranch(const Instruction *I); + + // Utility routines. + private: + bool isTypeLegal(const Type *Ty, EVT &VT); + bool isLoadTypeLegal(const Type *Ty, EVT &VT); + bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset); + bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset); + bool ARMLoadAlloca(const Instruction *I); + bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg); + bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset); + bool ARMMaterializeConstant(const ConstantInt *Val, unsigned &Reg); + + bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); + const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); +}; + +} // end anonymous namespace + +// #include "ARMGenCallingConv.inc" + +// DefinesOptionalPredicate - This is different from DefinesPredicate in that +// we don't care about implicit defs here, just places we'll need to add a +// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. +bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.hasOptionalDef()) + return false; + + // Look to see if our OptionalDef is defining CPSR or CCR. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + if (MO.getReg() == ARM::CPSR) + *CPSR = true; + } + return true; +} + +// If the machine is predicable go ahead and add the predicate operands, if +// it needs default CC operands add those. +const MachineInstrBuilder & +ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { + MachineInstr *MI = &*MIB; + + // Do we use a predicate? + if (TII.isPredicable(MI)) + AddDefaultPred(MIB); + + // Do we optionally set a predicate? Preds is size > 0 iff the predicate + // defines CPSR. All other OptionalDefines in ARM are the CCR register. + bool CPSR = false; + if (DefinesOptionalPredicate(MI, &CPSR)) { + if (CPSR) + AddDefaultT1CC(MIB); + else + AddDefaultCC(MIB); + } + return MIB; +} + +unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass* RC) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(Imm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, + unsigned Op0, bool Op0IsKill, + uint32_t Idx) { + unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + assert(TargetRegisterInfo::isVirtualRegister(Op0) && + "Cannot yet extract from physregs"); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx)); + return ResultReg; +} + +unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { + EVT VT = TLI.getValueType(C->getType(), true); + + // Only handle simple types. + if (!VT.isSimple()) return 0; + + // TODO: This should be safe for fp because they're just bits from the + // Constant. + // TODO: Theoretically we could materialize fp constants with instructions + // from VFP3. + + // MachineConstantPool wants an explicit alignment. + unsigned Align = TD.getPrefTypeAlignment(C->getType()); + if (Align == 0) { + // TODO: Figure out if this is correct. + Align = TD.getTypeAllocSize(C->getType()); + } + unsigned Idx = MCP.getConstantPoolIndex(C, Align); + + unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + // Different addressing modes between ARM/Thumb2 for constant pool loads. + if (isThumb) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::t2LDRpci)) + .addReg(DestReg).addConstantPoolIndex(Idx)); + else + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::LDRcp)) + .addReg(DestReg).addConstantPoolIndex(Idx) + .addReg(0).addImm(0)); + + return DestReg; +} + +bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) { + VT = TLI.getValueType(Ty, true); + + // Only handle simple types. + if (VT == MVT::Other || !VT.isSimple()) return false; + + // Handle all legal types, i.e. a register that will directly hold this + // value. + return TLI.isTypeLegal(VT); +} + +bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) { + if (isTypeLegal(Ty, VT)) return true; + + // If this is a type than can be sign or zero-extended to a basic operation + // go ahead and accept it now. + if (VT == MVT::i8 || VT == MVT::i16) + return true; + + return false; +} + +// Computes the Reg+Offset to get to an object. +bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg, + int &Offset) { + // Some boilerplate from the X86 FastISel. + const User *U = NULL; + unsigned Opcode = Instruction::UserOp1; + if (const Instruction *I = dyn_cast<Instruction>(Obj)) { + // Don't walk into other basic blocks; it's possible we haven't + // visited them yet, so the instructions may not yet be assigned + // virtual registers. + if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) + return false; + + Opcode = I->getOpcode(); + U = I; + } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { + Opcode = C->getOpcode(); + U = C; + } + + if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) + if (Ty->getAddressSpace() > 255) + // Fast instruction selection doesn't support the special + // address spaces. + return false; + + switch (Opcode) { + default: + //errs() << "Failing Opcode is: " << *Op1 << "\n"; + break; + case Instruction::Alloca: { + assert(false && "Alloca should have been handled earlier!"); + return false; + } + } + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { + //errs() << "Failing GV is: " << GV << "\n"; + (void)GV; + return false; + } + + // Try to get this in a register if nothing else has worked. + Reg = getRegForValue(Obj); + if (Reg == 0) return false; + + // Since the offset may be too large for the load instruction + // get the reg+offset into a register. + // TODO: Verify the additions work, otherwise we'll need to add the + // offset instead of 0 to the instructions and do all sorts of operand + // munging. + // TODO: Optimize this somewhat. + if (Offset != 0) { + ARMCC::CondCodes Pred = ARMCC::AL; + unsigned PredReg = 0; + + if (!isThumb) + emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + Reg, Reg, Offset, Pred, PredReg, + static_cast<const ARMBaseInstrInfo&>(TII)); + else { + assert(AFI->isThumb2Function()); + emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + Reg, Reg, Offset, Pred, PredReg, + static_cast<const ARMBaseInstrInfo&>(TII)); + } + } + + return true; +} + +bool ARMFastISel::ARMLoadAlloca(const Instruction *I) { + Value *Op0 = I->getOperand(0); + + // Verify it's an alloca. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + unsigned ResultReg = createResultReg(RC); + TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, + ResultReg, SI->second, RC, + TM.getRegisterInfo()); + UpdateValueMap(I, ResultReg); + return true; + } + } + return false; +} + +bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, + unsigned Reg, int Offset) { + + assert(VT.isSimple() && "Non-simple types are invalid here!"); + unsigned Opc; + + switch (VT.getSimpleVT().SimpleTy) { + default: + assert(false && "Trying to emit for an unhandled type!"); + return false; + case MVT::i16: + Opc = isThumb ? ARM::tLDRH : ARM::LDRH; + VT = MVT::i32; + break; + case MVT::i8: + Opc = isThumb ? ARM::tLDRB : ARM::LDRB; + VT = MVT::i32; + break; + case MVT::i32: + Opc = isThumb ? ARM::tLDR : ARM::LDR; + break; + } + + ResultReg = createResultReg(TLI.getRegClassFor(VT)); + + // TODO: Fix the Addressing modes so that these can share some code. + // Since this is a Thumb1 load this will work in Thumb1 or 2 mode. + if (isThumb) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(Reg).addImm(Offset).addReg(0)); + else + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(Reg).addReg(0).addImm(Offset)); + + return true; +} + +bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg) { + Value *Op1 = I->getOperand(1); + + // Verify it's an alloca. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + assert(SrcReg != 0 && "Nothing to store!"); + TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, + SrcReg, true /*isKill*/, SI->second, RC, + TM.getRegisterInfo()); + return true; + } + } + return false; +} + +bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, + unsigned DstReg, int Offset) { + unsigned StrOpc; + switch (VT.getSimpleVT().SimpleTy) { + default: return false; + case MVT::i1: + case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break; + case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break; + case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break; + case MVT::f32: + if (!Subtarget->hasVFP2()) return false; + StrOpc = ARM::VSTRS; + break; + case MVT::f64: + if (!Subtarget->hasVFP2()) return false; + StrOpc = ARM::VSTRD; + break; + } + + if (isThumb) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(StrOpc), SrcReg) + .addReg(DstReg).addImm(Offset).addReg(0)); + else + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(StrOpc), SrcReg) + .addReg(DstReg).addReg(0).addImm(Offset)); + + return true; +} + +bool ARMFastISel::ARMSelectStore(const Instruction *I) { + Value *Op0 = I->getOperand(0); + unsigned SrcReg = 0; + + // Yay type legalization + EVT VT; + if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) + return false; + + // Get the value to be stored into a register. + SrcReg = getRegForValue(Op0); + if (SrcReg == 0) + return false; + + // If we're an alloca we know we have a frame index and can emit the store + // quickly. + if (ARMStoreAlloca(I, SrcReg)) + return true; + + // Our register and offset with innocuous defaults. + unsigned Reg = 0; + int Offset = 0; + + // See if we can handle this as Reg + Offset + if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset)) + return false; + + if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false; + + return false; + +} + +bool ARMFastISel::ARMSelectLoad(const Instruction *I) { + // If we're an alloca we know we have a frame index and can emit the load + // directly in short order. + if (ARMLoadAlloca(I)) + return true; + + // Verify we have a legal type before going any further. + EVT VT; + if (!isLoadTypeLegal(I->getType(), VT)) + return false; + + // Our register and offset with innocuous defaults. + unsigned Reg = 0; + int Offset = 0; + + // See if we can handle this as Reg + Offset + if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset)) + return false; + + unsigned ResultReg; + if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool ARMFastISel::ARMSelectBranch(const Instruction *I) { + const BranchInst *BI = cast<BranchInst>(I); + MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; + + // Simple branch support. + unsigned CondReg = getRegForValue(BI->getCondition()); + if (CondReg == 0) return false; + + unsigned CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; + unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + .addReg(CondReg).addReg(CondReg)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) + .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + FastEmitBranch(FBB, DL); + FuncInfo.MBB->addSuccessor(TBB); + return true; +} + +// TODO: SoftFP support. +bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { + // No Thumb-1 for now. + if (isThumb && !AFI->isThumb2Function()) return false; + + switch (I->getOpcode()) { + case Instruction::Load: + return ARMSelectLoad(I); + case Instruction::Store: + return ARMSelectStore(I); + case Instruction::Br: + return ARMSelectBranch(I); + default: break; + } + return false; +} + +namespace llvm { + llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { + if (EnableARMFastISel) return new ARMFastISel(funcInfo); + return 0; + } +} diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp new file mode 100644 index 000000000000..85b0c6c248d0 --- /dev/null +++ b/lib/Target/ARM/ARMGlobalMerge.cpp @@ -0,0 +1,212 @@ +//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass merges globals with internal linkage into one. This way all the +// globals which were merged into a biggest one can be addressed using offsets +// from the same base pointer (no need for separate base pointer for each of the +// global). Such a transformation can significantly reduce the register pressure +// when many globals are involved. +// +// For example, consider the code which touches several global variables at once: +// +// static int foo[N], bar[N], baz[N]; +// +// for (i = 0; i < N; ++i) { +// foo[i] = bar[i] * baz[i]; +// } +// +// On ARM the addresses of 3 arrays should be kept in the registers, thus +// this code has quite large register pressure (loop body): +// +// ldr r1, [r5], #4 +// ldr r2, [r6], #4 +// mul r1, r2, r1 +// str r1, [r0], #4 +// +// Pass converts the code to something like: +// +// static struct { +// int foo[N]; +// int bar[N]; +// int baz[N]; +// } merged; +// +// for (i = 0; i < N; ++i) { +// merged.foo[i] = merged.bar[i] * merged.baz[i]; +// } +// +// and in ARM code this becomes: +// +// ldr r0, [r5, #40] +// ldr r1, [r5, #80] +// mul r0, r1, r0 +// str r0, [r5], #4 +// +// note that we saved 2 registers here almostly "for free". +// ===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-global-merge" +#include "ARM.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Attributes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +namespace { + class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// target type sizes. + const TargetLowering *TLI; + + bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool) const; + + public: + static char ID; // Pass identification, replacement for typeid. + explicit ARMGlobalMerge(const TargetLowering *tli) + : FunctionPass(ID), TLI(tli) {} + + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function& F); + + const char *getPassName() const { + return "Merge internal globals"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + FunctionPass::getAnalysisUsage(AU); + } + + struct GlobalCmp { + const TargetData *TD; + + GlobalCmp(const TargetData *td): + TD(td) { } + + bool operator() (const GlobalVariable* GV1, + const GlobalVariable* GV2) { + const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType(); + const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType(); + + return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2)); + } + }; + }; +} // end anonymous namespace + +char ARMGlobalMerge::ID = 0; + +bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool isConst) const { + const TargetData *TD = TLI->getTargetData(); + + // FIXME: Infer the maximum possible offset depending on the actual users + // (these max offsets are different for the users inside Thumb or ARM + // functions) + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + + // FIXME: Find better heuristics + std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD)); + + const Type *Int32Ty = Type::getInt32Ty(M.getContext()); + + for (size_t i = 0, e = Globals.size(); i != e; ) { + size_t j = 0; + uint64_t MergedSize = 0; + std::vector<const Type*> Tys; + std::vector<Constant*> Inits; + for (j = i; MergedSize < MaxOffset && j != e; ++j) { + const Type* Ty = Globals[j]->getType()->getElementType(); + Tys.push_back(Ty); + Inits.push_back(Globals[j]->getInitializer()); + MergedSize += TD->getTypeAllocSize(Ty); + } + + StructType* MergedTy = StructType::get(M.getContext(), Tys); + Constant* MergedInit = ConstantStruct::get(MergedTy, Inits); + GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst, + GlobalValue::InternalLinkage, + MergedInit, "merged"); + for (size_t k = i; k < j; ++k) { + SmallVector<Constant*, 2> Idx; + Idx.push_back(ConstantInt::get(Int32Ty, 0)); + Idx.push_back(ConstantInt::get(Int32Ty, k-i)); + + Constant* GEP = + ConstantExpr::getInBoundsGetElementPtr(MergedGV, + &Idx[0], Idx.size()); + + Globals[k]->replaceAllUsesWith(GEP); + Globals[k]->eraseFromParent(); + } + i = j; + } + + return true; +} + + +bool ARMGlobalMerge::doInitialization(Module& M) { + SmallVector<GlobalVariable*, 16> Globals, ConstGlobals; + const TargetData *TD = TLI->getTargetData(); + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + bool Changed = false; + + // Grab all non-const globals. + for (Module::global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + // Merge is safe for "normal" internal globals only + if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) + continue; + + // Ignore fancy-aligned globals for now. + if (I->getAlignment() != 0) + continue; + + // Ignore all 'special' globals. + if (I->getName().startswith("llvm.") || + I->getName().startswith(".llvm.")) + continue; + + if (TD->getTypeAllocSize(I->getType()) < MaxOffset) { + if (I->isConstant()) + ConstGlobals.push_back(I); + else + Globals.push_back(I); + } + } + + if (Globals.size() > 1) + Changed |= doMerge(Globals, M, false); + // FIXME: This currently breaks the EH processing due to way how the + // typeinfo detection works. We might want to detect the TIs and ignore + // them in the future. + + // if (ConstGlobals.size() > 1) + // Changed |= doMerge(ConstGlobals, M, true); + + return Changed; +} + +bool ARMGlobalMerge::runOnFunction(Function& F) { + return false; +} + +FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) { + return new ARMGlobalMerge(tli); +} diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c84d3ff81324..51a30c158dd1 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -36,6 +36,11 @@ using namespace llvm; +static cl::opt<bool> +DisableShifterOp("disable-shifter-op", cl::Hidden, + cl::desc("Disable isel of shifter-op"), + cl::init(false)); + //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -113,6 +118,16 @@ public: bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); + inline bool Pred_so_imm(SDNode *inN) const { + ConstantSDNode *N = cast<ConstantSDNode>(inN); + return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; + } + + inline bool Pred_t2_so_imm(SDNode *inN) const { + ConstantSDNode *N = cast<ConstantSDNode>(inN); + return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1; + } + // Include the pieces autogenerated from the target description. #include "ARMGenDAGISel.inc" @@ -220,6 +235,9 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op, SDValue &BaseReg, SDValue &ShReg, SDValue &Opc) { + if (DisableShifterOp) + return false; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); // Don't match base register only case. That is matched to a separate @@ -463,7 +481,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Mode) { Addr = N; - Mode = CurDAG->getTargetConstant(0, MVT::i32); + Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32); return true; } @@ -666,6 +684,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N, bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N, SDValue &BaseReg, SDValue &Opc) { + if (DisableShifterOp) + return false; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); // Don't match base register only case. That is matched to a separate @@ -1090,110 +1111,79 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, break; } + EVT ResTy; + if (NumVecs == 1) + ResTy = VT; + else { + unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; + if (!is64BitVector) + ResTyElts *= 2; + ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); + } + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue SuperReg; if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; - std::vector<EVT> ResTys(NumVecs, VT); - ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); - if (NumVecs < 2) + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5); + if (NumVecs == 1) return VLd; - SDValue RegSeq; - SDValue V0 = SDValue(VLd, 0); - SDValue V1 = SDValue(VLd, 1); - - // Form a REG_SEQUENCE to force register allocation. - if (NumVecs == 2) - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - else { - SDValue V2 = SDValue(VLd, 2); - // If it's a vld3, form a quad D-register but discard the last part. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : SDValue(VLd, 3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } - + SuperReg = SDValue(VLd, 0); assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec, - dl, VT, RegSeq); + dl, VT, SuperReg); ReplaceUses(SDValue(N, Vec), D); } - ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); return NULL; } - EVT RegVT = GetNEONSubregVT(VT); if (NumVecs <= 2) { // Quad registers are directly supported for VLD1 and VLD2, // loading pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; - std::vector<EVT> ResTys(2 * NumVecs, RegVT); - ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); - Chain = SDValue(VLd, 2 * NumVecs); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5); + if (NumVecs == 1) + return VLd; + + SuperReg = SDValue(VLd, 0); + Chain = SDValue(VLd, 1); - // Combine the even and odd subregs to produce the result. - if (NumVecs == 1) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); - ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); - } else { - SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, - SDValue(VLd, 0), SDValue(VLd, 1), - SDValue(VLd, 2), SDValue(VLd, 3)), 0); - SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); - SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); - ReplaceUses(SDValue(N, 0), Q0); - ReplaceUses(SDValue(N, 1), Q1); - } } else { // Otherwise, quad registers are loaded with two separate instructions, // where one loads the even registers and the other loads the odd registers. - - std::vector<EVT> ResTys(NumVecs, RegVT); - ResTys.push_back(MemAddr.getValueType()); - ResTys.push_back(MVT::Other); + EVT AddrTy = MemAddr.getValueType(); // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6); - Chain = SDValue(VLdA, NumVecs+1); + SDValue ImplDef = + SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); + const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; + SDNode *VLdA = + CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7); + Chain = SDValue(VLdA, 2); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), - Align, Reg0, Pred, Reg0, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); - Chain = SDValue(VLdB, NumVecs+1); - - SDValue V0 = SDValue(VLdA, 0); - SDValue V1 = SDValue(VLdB, 0); - SDValue V2 = SDValue(VLdA, 1); - SDValue V3 = SDValue(VLdB, 1); - SDValue V4 = SDValue(VLdA, 2); - SDValue V5 = SDValue(VLdB, 2); - SDValue V6 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) - : SDValue(VLdA, 3); - SDValue V7 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) - : SDValue(VLdB, 3); - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, - V4, V5, V6, V7), 0); - - // Extract out the 3 / 4 Q registers. - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, - dl, VT, RegSeq); - ReplaceUses(SDValue(N, Vec), Q); - } + const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0), + Pred, Reg0, Chain }; + SDNode *VLdB = + CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7); + SuperReg = SDValue(VLdB, 0); + Chain = SDValue(VLdB, 2); + } + + // Extract out the Q registers. + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, + dl, VT, SuperReg); + ReplaceUses(SDValue(N, Vec), Q); } ReplaceUses(SDValue(N, NumVecs), Chain); return NULL; @@ -1235,12 +1225,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 10> Ops; + SmallVector<SDValue, 7> Ops; Ops.push_back(MemAddr); Ops.push_back(Align); if (is64BitVector) { - if (NumVecs >= 2) { + if (NumVecs == 1) { + Ops.push_back(N->getOperand(3)); + } else { SDValue RegSeq; SDValue V0 = N->getOperand(0+3); SDValue V1 = N->getOperand(1+3); @@ -1257,111 +1249,61 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, : N->getOperand(3+3); RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, - RegSeq)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, - RegSeq)); - if (NumVecs > 2) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, - RegSeq)); - if (NumVecs > 3) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, - RegSeq)); - } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + Ops.push_back(RegSeq); } Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = DOpcodes[OpcodeIndex]; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6); } - EVT RegVT = GetNEONSubregVT(VT); if (NumVecs <= 2) { - // Quad registers are directly supported for VST1 and VST2, - // storing pairs of D regs. + // Quad registers are directly supported for VST1 and VST2. unsigned Opc = QOpcodes0[OpcodeIndex]; - if (NumVecs == 2) { - // First extract the pair of Q registers. + if (NumVecs == 1) { + Ops.push_back(N->getOperand(3)); + } else { + // Form a QQ register. SDValue Q0 = N->getOperand(3); SDValue Q1 = N->getOperand(4); - - // Form a QQ register. - SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT, - QQ)); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4); - } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3))); - } - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), - 5 + 2 * NumVecs); + Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0)); } + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. // Form the QQQQ REG_SEQUENCE. - SDValue V[8]; - for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3)); - V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3)); - } - if (NumVecs == 3) - V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], - V[4], V[5], V[6], V[7]), 0); + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) + : N->getOperand(3+3); + SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); // Store the even D registers. - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); Ops.push_back(Reg0); // post-access address offset - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, - RegVT, RegSeq)); + Ops.push_back(RegSeq); Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); + MVT::Other, Ops.data(), 7); Chain = SDValue(VStA, 1); // Store the odd D registers. Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, - RegVT, RegSeq); - Ops[NumVecs+5] = Chain; + Ops[6] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); + MVT::Other, Ops.data(), 7); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1675,7 +1617,7 @@ SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, if (!T) return 0; - if (Predicate_t2_so_imm(TrueVal.getNode())) { + if (Pred_t2_so_imm(TrueVal.getNode())) { SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; @@ -1692,7 +1634,7 @@ SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, if (!T) return 0; - if (Predicate_so_imm(TrueVal.getNode())) { + if (Pred_so_imm(TrueVal.getNode())) { SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; @@ -1740,7 +1682,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { } // Pattern: (ARMcmov:i32 GPR:i32:$false, - // (imm:i32)<<P:Predicate_so_imm>>:$true, + // (imm:i32)<<P:Pred_so_imm>>:$true, // (imm:i32):$cc) // Emits: (MOVCCi:i32 GPR:i32:$false, // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) @@ -2013,43 +1955,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ResNode = SelectARMIndexedLoad(N); if (ResNode) return ResNode; - - // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value. - if (Subtarget->hasVFP2() && - N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) { - SDValue Chain = N->getOperand(0); - SDValue AM5Opc = - CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = getAL(CurDAG); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain }; - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); - SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl, - MVT::v2f64, MVT::Other, Ops, 5); - cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); - return Ret; - } - // Other cases are autogenerated. - break; - } - case ISD::STORE: { - // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value. - if (Subtarget->hasVFP2() && - N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) { - SDValue Chain = N->getOperand(0); - SDValue AM5Opc = - CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = getAL(CurDAG); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { N->getOperand(1), N->getOperand(2), - AM5Opc, Pred, PredReg, Chain }; - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); - SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6); - cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); - return Ret; - } // Other cases are autogenerated. break; } @@ -2206,39 +2111,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vld1: { unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, ARM::VLD1d32, ARM::VLD1d64 }; - unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, - ARM::VLD1q32, ARM::VLD1q64 }; + unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo, + ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo }; return SelectVLD(N, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld2: { - unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, - ARM::VLD2d32, ARM::VLD1q64 }; - unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 }; + unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo, + ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, + ARM::VLD2q32Pseudo }; return SelectVLD(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld3: { - unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16, - ARM::VLD3d32, ARM::VLD1d64T }; - unsigned QOpcodes0[] = { ARM::VLD3q8_UPD, - ARM::VLD3q16_UPD, - ARM::VLD3q32_UPD }; - unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD, - ARM::VLD3q16odd_UPD, - ARM::VLD3q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo, + ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo }; + unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, + ARM::VLD3q16Pseudo_UPD, + ARM::VLD3q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, + ARM::VLD3q16oddPseudo_UPD, + ARM::VLD3q32oddPseudo_UPD }; return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4: { - unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16, - ARM::VLD4d32, ARM::VLD1d64Q }; - unsigned QOpcodes0[] = { ARM::VLD4q8_UPD, - ARM::VLD4q16_UPD, - ARM::VLD4q32_UPD }; - unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD, - ARM::VLD4q16odd_UPD, - ARM::VLD4q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo, + ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo }; + unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, + ARM::VLD4q16Pseudo_UPD, + ARM::VLD4q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, + ARM::VLD4q16oddPseudo_UPD, + ARM::VLD4q32oddPseudo_UPD }; return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } @@ -2266,39 +2172,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst1: { unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, ARM::VST1d32, ARM::VST1d64 }; - unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, - ARM::VST1q32, ARM::VST1q64 }; + unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo, + ARM::VST1q32Pseudo, ARM::VST1q64Pseudo }; return SelectVST(N, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst2: { - unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, - ARM::VST2d32, ARM::VST1q64 }; - unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 }; + unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo, + ARM::VST2d32Pseudo, ARM::VST1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, + ARM::VST2q32Pseudo }; return SelectVST(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst3: { - unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16, - ARM::VST3d32, ARM::VST1d64T }; - unsigned QOpcodes0[] = { ARM::VST3q8_UPD, - ARM::VST3q16_UPD, - ARM::VST3q32_UPD }; - unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD, - ARM::VST3q16odd_UPD, - ARM::VST3q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo, + ARM::VST3d32Pseudo, ARM::VST1d64TPseudo }; + unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, + ARM::VST3q16Pseudo_UPD, + ARM::VST3q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, + ARM::VST3q16oddPseudo_UPD, + ARM::VST3q32oddPseudo_UPD }; return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4: { - unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16, - ARM::VST4d32, ARM::VST1d64Q }; - unsigned QOpcodes0[] = { ARM::VST4q8_UPD, - ARM::VST4q16_UPD, - ARM::VST4q32_UPD }; - unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD, - ARM::VST4q16odd_UPD, - ARM::VST4q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo, + ARM::VST4d32Pseudo, ARM::VST1d64QPseudo }; + unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, + ARM::VST4q16Pseudo_UPD, + ARM::VST4q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, + ARM::VST4q16oddPseudo_UPD, + ARM::VST4q32oddPseudo_UPD }; return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0091df753eb7..ce4a2c90689c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -55,7 +55,14 @@ STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> EnableARMTailCalls("arm-tail-calls", cl::Hidden, cl::desc("Generate tail calls (TEMPORARY OPTION)."), - cl::init(true)); + cl::init(false)); + +// This option should go away when Machine LICM is smart enough to hoist a +// reg-to-reg VDUP. +static cl::opt<bool> +EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden, + cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."), + cl::init(false)); static cl::opt<bool> EnableARMLongCalls("arm-long-calls", cl::Hidden, @@ -122,7 +129,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); + setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); } + setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { @@ -166,6 +176,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); + RegInfo = TM.getRegisterInfo(); if (Subtarget->isTargetDarwin()) { // Uses VFP for Thumb libfuncs if available. @@ -264,7 +275,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::i32, ARM::GPRRegisterClass); if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, ARM::SPRRegisterClass); - addRegisterClass(MVT::f64, ARM::DPRRegisterClass); + if (!Subtarget->isFPOnlySP()) + addRegisterClass(MVT::f64, ARM::DPRRegisterClass); setTruncStoreAction(MVT::f64, MVT::f32, Expand); } @@ -310,9 +322,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); + // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); - setOperationAction(ISD::MUL, MVT::v2i64, Expand); + // Custom handling for some quad-vector types to detect VMULL. + setOperationAction(ISD::MUL, MVT::v8i16, Custom); + setOperationAction(ISD::MUL, MVT::v4i32, Custom); + setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); @@ -410,12 +427,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // doesn't yet know how to not do that for SjLj. setExceptionSelectorRegister(ARM::R0); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise - // use the default expansion. - bool canHandleAtomics = - (Subtarget->hasV7Ops() || - (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())); - if (canHandleAtomics) { + // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use + // the default expansion. + if (Subtarget->hasDataBarrier() || + (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) { // membarrier needs custom lowering; the rest are legal and handled // normally. setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); @@ -466,10 +481,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); + } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -481,9 +498,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::SETCC, MVT::f64, Expand); - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); @@ -530,6 +547,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); + if (Subtarget->hasV6T2Ops()) + setTargetDAGCombine(ISD::OR); + setStackPointerRegisterToSaveRestore(ARM::SP); if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) @@ -547,6 +567,37 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) benefitFromCodePlacementOpt = true; } +std::pair<const TargetRegisterClass*, uint8_t> +ARMTargetLowering::findRepresentativeClass(EVT VT) const{ + const TargetRegisterClass *RRC = 0; + uint8_t Cost = 1; + switch (VT.getSimpleVT().SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(VT); + // Use DPR as representative register class for all floating point + // and vector types. Since there are 32 SPR registers and 32 DPR registers so + // the cost is 1 for both f32 and f64. + case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: + case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: + RRC = ARM::DPRRegisterClass; + break; + case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: + case MVT::v4f32: case MVT::v2f64: + RRC = ARM::DPRRegisterClass; + Cost = 2; + break; + case MVT::v4i64: + RRC = ARM::DPRRegisterClass; + Cost = 4; + break; + case MVT::v8i64: + RRC = ARM::DPRRegisterClass; + Cost = 8; + break; + } + return std::make_pair(RRC, Cost); +} + const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; @@ -561,6 +612,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; + case ARMISD::AND: return "ARMISD::AND"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; @@ -635,9 +687,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::VMULLs: return "ARMISD::VMULLs"; + case ARMISD::VMULLu: return "ARMISD::VMULLu"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; + case ARMISD::BFI: return "ARMISD::BFI"; } } @@ -656,11 +711,23 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { return TargetLowering::getRegClassFor(VT); } +// Create a fast isel object. +FastISel * +ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { + return ARM::createFastISel(funcInfo); +} + /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; } +/// getMaximalGlobalOffset - Returns the maximal possible offset which can +/// be used for loads / stores from the global. +unsigned ARMTargetLowering::getMaximalGlobalOffset() const { + return (Subtarget->isThumb1Only() ? 127 : 4095); +} + Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { unsigned NumVals = N->getNumValues(); if (!NumVals) @@ -688,6 +755,24 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { return Sched::RegPressure; } +unsigned +ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + switch (RC->getID()) { + default: + return 0; + case ARM::tGPRRegClassID: + return RegInfo->hasFP(MF) ? 4 : 5; + case ARM::GPRRegClassID: { + unsigned FP = RegInfo->hasFP(MF) ? 1 : 0; + return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0); + } + case ARM::SPRRegClassID: // Currently not used as 'rep' register class. + case ARM::DPRRegClassID: + return 32 - 10; + } +} + //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// @@ -793,8 +878,9 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCState &State, bool CanFail) { static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 }; - unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); + unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { // For the 2nd half of a v2f64, do not just fail. if (CanFail) @@ -812,6 +898,10 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, if (HiRegList[i] == Reg) break; + unsigned T = State.AllocateReg(LoRegList[i]); + (void)T; + assert(T == LoRegList[i] && "Could not allocate register"); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], LocVT, LocInfo)); @@ -1624,6 +1714,10 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); } +unsigned ARMTargetLowering::getJumpTableEncoding() const { + return MachineJumpTableInfo::EK_Inline; +} + SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -1917,17 +2011,19 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, DebugLoc dl = Op.getDebugLoc(); SDValue Op5 = Op.getOperand(5); unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); - // v6 and v7 can both handle barriers directly, but need handled a bit - // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should + // Some subtargets which have dmb and dsb instructions can handle barriers + // directly. Some ARMv6 cpus can support them with the help of mcr + // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should // never get here. unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; - if (Subtarget->hasV7Ops()) + if (Subtarget->hasDataBarrier()) return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); - else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()) + else { + assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() && + "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, MVT::i32)); - assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); - return SDValue(); + } } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { @@ -1945,54 +2041,6 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { } SDValue -ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, - SelectionDAG &DAG) const { - SDNode *Node = Op.getNode(); - DebugLoc dl = Node->getDebugLoc(); - EVT VT = Node->getValueType(0); - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - SDValue Align = Op.getOperand(2); - - // Chain the dynamic stack allocation so that it doesn't modify the stack - // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); - - unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue(); - unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment(); - if (AlignVal > StackAlign) - // Do this now since selection pass cannot introduce new target - // independent node. - Align = DAG.getConstant(-(uint64_t)AlignVal, VT); - - // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up - // using a "add r, sp, r" instead. Negate the size now so we don't have to - // do even more horrible hack later. - MachineFunction &MF = DAG.getMachineFunction(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - if (AFI->isThumb1OnlyFunction()) { - bool Negate = true; - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size); - if (C) { - uint32_t Val = C->getZExtValue(); - if (Val <= 508 && ((Val & 3) == 0)) - Negate = false; - } - if (Negate) - Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size); - } - - SDVTList VTList = DAG.getVTList(VT, MVT::Other); - SDValue Ops1[] = { Chain, Size, Align }; - SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3); - Chain = Res.getValue(1); - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), SDValue()); - SDValue Ops2[] = { Res, Chain }; - return DAG.getMergeValues(Ops2, 2, dl); -} - -SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, DebugLoc dl) const { @@ -2229,28 +2277,28 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, default: break; case ISD::SETLT: case ISD::SETGE: - if (isLegalICmpImmediate(C-1)) { + if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETULT: case ISD::SETUGE: - if (C > 0 && isLegalICmpImmediate(C-1)) { + if (C != 0 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETLE: case ISD::SETGT: - if (isLegalICmpImmediate(C+1)) { + if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; RHS = DAG.getConstant(C+1, MVT::i32); } break; case ISD::SETULE: case ISD::SETUGT: - if (C < 0xffffffff && isLegalICmpImmediate(C+1)) { + if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; RHS = DAG.getConstant(C+1, MVT::i32); } @@ -2287,6 +2335,52 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); } +SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { + SDValue Cond = Op.getOperand(0); + SDValue SelectTrue = Op.getOperand(1); + SDValue SelectFalse = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + // Convert: + // + // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) + // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) + // + if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { + const ConstantSDNode *CMOVTrue = + dyn_cast<ConstantSDNode>(Cond.getOperand(0)); + const ConstantSDNode *CMOVFalse = + dyn_cast<ConstantSDNode>(Cond.getOperand(1)); + + if (CMOVTrue && CMOVFalse) { + unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); + unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); + + SDValue True; + SDValue False; + if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { + True = SelectTrue; + False = SelectFalse; + } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { + True = SelectFalse; + False = SelectTrue; + } + + if (True.getNode() && False.getNode()) { + EVT VT = Cond.getValueType(); + SDValue ARMcc = Cond.getOperand(2); + SDValue CCR = Cond.getOperand(3); + SDValue Cmp = Cond.getOperand(4); + return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); + } + } + } + + return DAG.getSelectCC(dl, Cond, + DAG.getConstant(0, Cond.getValueType()), + SelectTrue, SelectFalse, ISD::SETNE); +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -2403,8 +2497,9 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { bool SeenZero = false; if (canChangeToInt(LHS, SeenZero, Subtarget) && canChangeToInt(RHS, SeenZero, Subtarget) && - // If one of the operand is zero, it's safe to ignore the NaN case. - (FiniteOnlyFPMath() || SeenZero)) { + // If one of the operand is zero, it's safe to ignore the NaN case since + // we only care about equality comparisons. + (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { // If unsafe fp math optimization is enabled and there are no othter uses of // the CMP operands, and the condition code is EQ oe NE, we can optimize it // to an integer comparison. @@ -2587,7 +2682,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass); + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } @@ -2730,6 +2825,24 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } +SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, + SelectionDAG &DAG) const { + // The rounding mode is in bits 23:22 of the FPSCR. + // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 + // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) + // so that the shift + and get folded into a bitfield extract. + DebugLoc dl = Op.getDebugLoc(); + SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, + DAG.getConstant(Intrinsic::arm_get_fpscr, + MVT::i32)); + SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, + DAG.getConstant(1U << 22, MVT::i32)); + SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, + DAG.getConstant(22, MVT::i32)); + return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, + DAG.getConstant(3, MVT::i32)); +} + static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); @@ -3046,6 +3159,11 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; + + // Assume that the first shuffle index is not UNDEF. Fail if it is. + if (M[0] < 0) + return false; + Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first @@ -3061,6 +3179,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, ReverseVEXT = true; } + if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast<unsigned>(M[i])) return false; } @@ -3086,13 +3205,16 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; + // If the first shuffle index is UNDEF, be optimistic. + if (M[0] < 0) + BlockElts = BlockSize / EltSz; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) return false; for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned) M[i] != - (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) + if (M[i] < 0) continue; // ignore UNDEF indices + if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) return false; } @@ -3108,8 +3230,8 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { - if ((unsigned) M[i] != i + WhichResult || - (unsigned) M[i+1] != i + NumElts + WhichResult) + if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || + (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) return false; } return true; @@ -3127,8 +3249,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { - if ((unsigned) M[i] != i + WhichResult || - (unsigned) M[i+1] != i + WhichResult) + if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || + (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) return false; } return true; @@ -3143,6 +3265,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i != NumElts; ++i) { + if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned) M[i] != 2 * i + WhichResult) return false; } @@ -3168,7 +3291,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, for (unsigned j = 0; j != 2; ++j) { unsigned Idx = WhichResult; for (unsigned i = 0; i != Half; ++i) { - if ((unsigned) M[i + j * Half] != Idx) + int MIdx = M[i + j * Half]; + if (MIdx >= 0 && (unsigned) MIdx != Idx) return false; Idx += 2; } @@ -3191,8 +3315,8 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { - if ((unsigned) M[i] != Idx || - (unsigned) M[i+1] != Idx + NumElts) + if ((M[i] >= 0 && (unsigned) M[i] != Idx) || + (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) return false; Idx += 1; } @@ -3217,8 +3341,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { - if ((unsigned) M[i] != Idx || - (unsigned) M[i+1] != Idx) + if ((M[i] >= 0 && (unsigned) M[i] != Idx) || + (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) return false; Idx += 1; } @@ -3230,9 +3354,30 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } +// If N is an integer constant that can be moved into a register in one +// instruction, return an SDValue of such a constant (will become a MOV +// instruction). Otherwise return null. +static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, + const ARMSubtarget *ST, DebugLoc dl) { + uint64_t Val; + if (!isa<ConstantSDNode>(N)) + return SDValue(); + Val = cast<ConstantSDNode>(N)->getZExtValue(); + + if (ST->isThumb1Only()) { + if (Val <= 255 || ~Val <= 255) + return DAG.getConstant(Val, MVT::i32); + } else { + if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) + return DAG.getConstant(Val, MVT::i32); + } + return SDValue(); +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. -static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -3292,15 +3437,41 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { if (isOnlyLowElement) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); - // If all elements are constants, fall back to the default expansion, which - // will generate a load from the constant pool. + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + + if (EnableARMVDUPsplat) { + // Use VDUP for non-constant splats. For f32 constant splats, reduce to + // i32 and try again. + if (usesOnlyOneValue && EltSize <= 32) { + if (!isConstant) + return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (VT.getVectorElementType().isFloatingPoint()) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumElts; ++i) + Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, + Op.getOperand(i))); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0], + NumElts); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + LowerBUILD_VECTOR(Val, DAG, ST)); + } + SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); + if (Val.getNode()) + return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + } + } + + // If all elements are constants and the case above didn't get hit, fall back + // to the default expansion, which will generate a load from the constant + // pool. if (isConstant) return SDValue(); - // Use VDUP for non-constant splats. - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (usesOnlyOneValue && EltSize <= 32) - return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (!EnableARMVDUPsplat) { + // Use VDUP for non-constant splats. + if (usesOnlyOneValue && EltSize <= 32) + return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + } // Vectors with 32- or 64-bit elements can be built by directly assigning // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands @@ -3585,6 +3756,51 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); } +/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or +/// an extending load, return the unextended value. +static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) + return N->getOperand(0); + LoadSDNode *LD = cast<LoadSDNode>(N); + return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), + LD->getBasePtr(), LD->getSrcValue(), + LD->getSrcValueOffset(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); +} + +static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { + // Multiplications are only custom-lowered for 128-bit vectors so that + // VMULL can be detected. Otherwise v2i64 multiplications are not legal. + EVT VT = Op.getValueType(); + assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); + SDNode *N0 = Op.getOperand(0).getNode(); + SDNode *N1 = Op.getOperand(1).getNode(); + unsigned NewOpc = 0; + if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) && + (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) { + NewOpc = ARMISD::VMULLs; + } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) && + (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) { + NewOpc = ARMISD::VMULLu; + } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) { + // Fall through to expand this. It is not legal. + return SDValue(); + } else { + // Other vector multiplications are legal. + return Op; + } + + // Legalize to a VMULL instruction. + DebugLoc DL = Op.getDebugLoc(); + SDValue Op0 = SkipExtension(N0, DAG); + SDValue Op1 = SkipExtension(N1, DAG); + + assert(Op0.getValueType().is64BitVector() && + Op1.getValueType().is64BitVector() && + "unexpected types for extended operands to VMULL"); + return DAG.getNode(NewOpc, DL, VT, Op0, Op1); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -3594,10 +3810,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); - case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::SINT_TO_FP: @@ -3621,10 +3837,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::VSETCC: return LowerVSETCC(Op, DAG); - case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); + case ISD::MUL: return LowerMUL(Op, DAG); } return SDValue(); } @@ -4002,78 +4220,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } - - case ARM::tANDsp: - case ARM::tADDspr_: - case ARM::tSUBspi_: - case ARM::t2SUBrSPi_: - case ARM::t2SUBrSPi12_: - case ARM::t2SUBrSPs_: { - MachineFunction *MF = BB->getParent(); - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); - bool DstIsDead = MI->getOperand(0).isDead(); - bool SrcIsKill = MI->getOperand(1).isKill(); - - if (SrcReg != ARM::SP) { - // Copy the source to SP from virtual register. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); - unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) - ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; - BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP) - .addReg(SrcReg, getKillRegState(SrcIsKill)); - } - - unsigned OpOpc = 0; - bool NeedPred = false, NeedCC = false, NeedOp3 = false; - switch (MI->getOpcode()) { - default: - llvm_unreachable("Unexpected pseudo instruction!"); - case ARM::tANDsp: - OpOpc = ARM::tAND; - NeedPred = true; - break; - case ARM::tADDspr_: - OpOpc = ARM::tADDspr; - break; - case ARM::tSUBspi_: - OpOpc = ARM::tSUBspi; - break; - case ARM::t2SUBrSPi_: - OpOpc = ARM::t2SUBrSPi; - NeedPred = true; NeedCC = true; - break; - case ARM::t2SUBrSPi12_: - OpOpc = ARM::t2SUBrSPi12; - NeedPred = true; - break; - case ARM::t2SUBrSPs_: - OpOpc = ARM::t2SUBrSPs; - NeedPred = true; NeedCC = true; NeedOp3 = true; - break; - } - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP); - if (OpOpc == ARM::tAND) - AddDefaultT1CC(MIB); - MIB.addReg(ARM::SP); - MIB.addOperand(MI->getOperand(2)); - if (NeedOp3) - MIB.addOperand(MI->getOperand(3)); - if (NeedPred) - AddDefaultPred(MIB); - if (NeedCC) - AddDefaultCC(MIB); - - // Copy the result from SP to virtual register. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); - unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) - ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; - BuildMI(*BB, MI, dl, TII->get(CopyOpc)) - .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(ARM::SP); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - } } } @@ -4141,30 +4287,42 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, return SDValue(); } -/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. -static SDValue PerformADDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - // added by evan in r37685 with no testcase. - SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); - +/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with +/// operands N0 and N1. This is a helper for PerformADDCombine that is +/// called with the default operands, and if that fails, with commuted +/// operands. +static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, + TargetLowering::DAGCombinerInfo &DCI) { // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N0, N1, DCI); if (Result.getNode()) return Result; } - if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N1, N0, DCI); - if (Result.getNode()) return Result; - } - return SDValue(); } +/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. +/// +static SDValue PerformADDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // First try with the default operand order. + SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI); + if (Result.getNode()) + return Result; + + // If that didn't work, try again with the operands commuted. + return PerformADDCombineWithOperands(N, N1, N0, DCI); +} + /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. +/// static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - // added by evan in r37685 with no testcase. - SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { @@ -4231,6 +4389,105 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +/// PerformORCombine - Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when + // reasonable. + + // BFI is only available on V6T2+ + if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + // 1) or (and A, mask), val => ARMbfi A, val, mask + // iff (val & mask) == val + // + // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask + // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) + // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) + // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // (i.e., copy a bitfield value into another bitfield of the same width) + if (N0.getOpcode() != ISD::AND) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + + + // The value and the mask need to be constants so we can verify this is + // actually a bitfield set. If the mask is 0xffff, we can do better + // via a movt instruction, so don't use BFI in that case. + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C) + return SDValue(); + unsigned Mask = C->getZExtValue(); + if (Mask == 0xffff) + return SDValue(); + SDValue Res; + // Case (1): or (and A, mask), val => ARMbfi A, val, mask + if ((C = dyn_cast<ConstantSDNode>(N1))) { + unsigned Val = C->getZExtValue(); + if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val) + return SDValue(); + Val >>= CountTrailingZeros_32(~Mask); + + Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), + DAG.getConstant(Val, MVT::i32), + DAG.getConstant(Mask, MVT::i32)); + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + } else if (N1.getOpcode() == ISD::AND) { + // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask + C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!C) + return SDValue(); + unsigned Mask2 = C->getZExtValue(); + + if (ARM::isBitFieldInvertedMask(Mask) && + ARM::isBitFieldInvertedMask(~Mask2) && + (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) { + // The pack halfword instruction works better for masks that fit it, + // so use that when it's available. + if (Subtarget->hasT2ExtractPack() && + (Mask == 0xffff || Mask == 0xffff0000)) + return SDValue(); + // 2a + unsigned lsb = CountTrailingZeros_32(Mask2); + Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), + DAG.getConstant(lsb, MVT::i32)); + Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res, + DAG.getConstant(Mask, MVT::i32)); + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + } else if (ARM::isBitFieldInvertedMask(~Mask) && + ARM::isBitFieldInvertedMask(Mask2) && + (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) { + // The pack halfword instruction works better for masks that fit it, + // so use that when it's available. + if (Subtarget->hasT2ExtractPack() && + (Mask2 == 0xffff || Mask2 == 0xffff0000)) + return SDValue(); + // 2b + unsigned lsb = CountTrailingZeros_32(Mask); + Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(lsb, MVT::i32)); + Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, + DAG.getConstant(Mask2, MVT::i32)); + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + } + } + + return SDValue(); +} + /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, @@ -4561,7 +4818,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { // If the target supports NEON, try to use vmax/vmin instructions for f32 - // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set, + // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is // a NaN; only do the transformation when it matches that behavior. @@ -4648,6 +4905,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); + case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); @@ -5379,6 +5637,21 @@ int ARM::getVFPf64Imm(const APFloat &FPImm) { return ((int)Sign << 7) | (Exp << 4) | Mantissa; } +bool ARM::isBitFieldInvertedMask(unsigned v) { + if (v == 0xffffffff) + return 0; + // there can be 1's on either or both "outsides", all the "inside" + // bits must be 0's + unsigned int lsb = 0, msb = 31; + while (v & (1 << msb)) --msb; + while (v & (1 << lsb)) ++lsb; + for (unsigned int i = lsb; i <= msb; ++i) { + if (v & (1 << i)) + return 0; + } + return 1; +} + /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 128b72e1e743..ba9ea7f15e7b 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -17,6 +17,8 @@ #include "ARMSubtarget.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/CallingConvLower.h" #include <vector> @@ -45,6 +47,8 @@ namespace llvm { PIC_ADD, // Add with a PC operand and a PIC label. + AND, // ARM "and" instruction that sets the 's' flag in CPSR. + CMP, // ARM compare instructions. CMPZ, // ARM compare that sets only Z flag. CMPFP, // ARM VFP compare instruction, sets FPSCR. @@ -80,7 +84,7 @@ namespace llvm { MEMBARRIER, // Memory barrier SYNCBARRIER, // Memory sync barrier - + VCEQ, // Vector compare equal. VCGE, // Vector compare greater than or equal. VCGEU, // Vector compare unsigned greater than or equal. @@ -141,6 +145,10 @@ namespace llvm { VUZP, // unzip (deinterleave) VTRN, // transpose + // Vector multiply long: + VMULLs, // ...signed + VMULLu, // ...unsigned + // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other // operations, but for ARM some BUILD_VECTORs are legal as-is and their @@ -150,7 +158,10 @@ namespace llvm { // Floating-point max and min: FMAX, - FMIN + FMIN, + + // Bit-field insert + BFI }; } @@ -162,6 +173,7 @@ namespace llvm { /// returns -1. int getVFPf32Imm(const APFloat &FPImm); int getVFPf64Imm(const APFloat &FPImm); + bool isBitFieldInvertedMask(unsigned v); } //===--------------------------------------------------------------------===// @@ -171,6 +183,8 @@ namespace llvm { public: explicit ARMTargetLowering(TargetMachine &TM); + virtual unsigned getJumpTableEncoding(void) const; + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result @@ -255,8 +269,19 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + /// getMaximalGlobalOffset - Returns the maximal possible offset which can + /// be used for loads / stores from the global. + virtual unsigned getMaximalGlobalOffset() const; + + /// createFastISel - This method returns a target specific FastISel object, + /// or null if the target does not support "fast" ISel. + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; + Sched::Preference getSchedulingPreference(SDNode *N) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -265,11 +290,17 @@ namespace llvm { /// materialize the FP immediate as a load from a constant pool. virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + protected: + std::pair<const TargetRegisterClass*, uint8_t> + findRepresentativeClass(EVT VT) const; + private: /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; + const TargetRegisterInfo *RegInfo; + /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created. /// unsigned ARMPCLabelIndex; @@ -310,14 +341,15 @@ namespace llvm { SelectionDAG &DAG) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -377,6 +409,10 @@ namespace llvm { unsigned BinOpcode) const; }; + + namespace ARM { + FastISel *createFastISel(FunctionLoweringInfo &funcInfo); + } } #endif // ARMISELLOWERING_H diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index ac568e75ccc4..113cfffe61f9 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -36,37 +36,38 @@ def LdStMulFrm : Format<10>; def LdStExFrm : Format<11>; def ArithMiscFrm : Format<12>; -def ExtFrm : Format<13>; - -def VFPUnaryFrm : Format<14>; -def VFPBinaryFrm : Format<15>; -def VFPConv1Frm : Format<16>; -def VFPConv2Frm : Format<17>; -def VFPConv3Frm : Format<18>; -def VFPConv4Frm : Format<19>; -def VFPConv5Frm : Format<20>; -def VFPLdStFrm : Format<21>; -def VFPLdStMulFrm : Format<22>; -def VFPMiscFrm : Format<23>; - -def ThumbFrm : Format<24>; -def MiscFrm : Format<25>; - -def NGetLnFrm : Format<26>; -def NSetLnFrm : Format<27>; -def NDupFrm : Format<28>; -def NLdStFrm : Format<29>; -def N1RegModImmFrm: Format<30>; -def N2RegFrm : Format<31>; -def NVCVTFrm : Format<32>; -def NVDupLnFrm : Format<33>; -def N2RegVShLFrm : Format<34>; -def N2RegVShRFrm : Format<35>; -def N3RegFrm : Format<36>; -def N3RegVShFrm : Format<37>; -def NVExtFrm : Format<38>; -def NVMulSLFrm : Format<39>; -def NVTBLFrm : Format<40>; +def SatFrm : Format<13>; +def ExtFrm : Format<14>; + +def VFPUnaryFrm : Format<15>; +def VFPBinaryFrm : Format<16>; +def VFPConv1Frm : Format<17>; +def VFPConv2Frm : Format<18>; +def VFPConv3Frm : Format<19>; +def VFPConv4Frm : Format<20>; +def VFPConv5Frm : Format<21>; +def VFPLdStFrm : Format<22>; +def VFPLdStMulFrm : Format<23>; +def VFPMiscFrm : Format<24>; + +def ThumbFrm : Format<25>; +def MiscFrm : Format<26>; + +def NGetLnFrm : Format<27>; +def NSetLnFrm : Format<28>; +def NDupFrm : Format<29>; +def NLdStFrm : Format<30>; +def N1RegModImmFrm: Format<31>; +def N2RegFrm : Format<32>; +def NVCVTFrm : Format<33>; +def NVDupLnFrm : Format<34>; +def N2RegVShLFrm : Format<35>; +def N2RegVShRFrm : Format<36>; +def N3RegFrm : Format<37>; +def N3RegVShFrm : Format<38>; +def NVExtFrm : Format<39>; +def NVMulSLFrm : Format<40>; +def NVTBLFrm : Format<41>; // Misc flags. @@ -87,21 +88,21 @@ class Xform16Bit { bit canXformTo16Bit = 1; } class AddrMode<bits<4> val> { bits<4> Value = val; } -def AddrModeNone : AddrMode<0>; -def AddrMode1 : AddrMode<1>; -def AddrMode2 : AddrMode<2>; -def AddrMode3 : AddrMode<3>; -def AddrMode4 : AddrMode<4>; -def AddrMode5 : AddrMode<5>; -def AddrMode6 : AddrMode<6>; -def AddrModeT1_1 : AddrMode<7>; -def AddrModeT1_2 : AddrMode<8>; -def AddrModeT1_4 : AddrMode<9>; -def AddrModeT1_s : AddrMode<10>; -def AddrModeT2_i12: AddrMode<11>; -def AddrModeT2_i8 : AddrMode<12>; -def AddrModeT2_so : AddrMode<13>; -def AddrModeT2_pc : AddrMode<14>; +def AddrModeNone : AddrMode<0>; +def AddrMode1 : AddrMode<1>; +def AddrMode2 : AddrMode<2>; +def AddrMode3 : AddrMode<3>; +def AddrMode4 : AddrMode<4>; +def AddrMode5 : AddrMode<5>; +def AddrMode6 : AddrMode<6>; +def AddrModeT1_1 : AddrMode<7>; +def AddrModeT1_2 : AddrMode<8>; +def AddrModeT1_4 : AddrMode<9>; +def AddrModeT1_s : AddrMode<10>; +def AddrModeT2_i12 : AddrMode<11>; +def AddrModeT2_i8 : AddrMode<12>; +def AddrModeT2_so : AddrMode<13>; +def AddrModeT2_pc : AddrMode<14>; def AddrModeT2_i8s4 : AddrMode<15>; // Instruction size. @@ -137,11 +138,17 @@ def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains // ARM special operands. // +def CondCodeOperand : AsmOperandClass { + let Name = "CondCode"; + let SuperClasses = []; +} + // ARM Predicate operand. Default to 14 = always (AL). Second part is CC // register whose default is 0 (no register). def pred : PredicateOperand<OtherVT, (ops i32imm, CCR), (ops (i32 14), (i32 zero_reg))> { let PrintMethod = "printPredicateOperand"; + let ParserMatchClass = CondCodeOperand; } // Conditional code result for instructions whose 's' bit is set, e.g. subs. @@ -240,6 +247,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } + // A few are not predicable class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, @@ -254,9 +262,9 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<Predicate> Predicates = [IsARM]; } -// Same as I except it can optionally modify CPSR. Note it's modeled as -// an input operand since by default it's a zero register. It will -// become an implicit def once it's "flipped". +// Same as I except it can optionally modify CPSR. Note it's modeled as an input +// operand since by default it's a zero register. It will become an implicit def +// once it's "flipped". class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, @@ -313,7 +321,7 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin, } class ABXIx2<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, itin, + : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, Pseudo, itin, asm, "", pattern>; // BR_JT instructions @@ -322,16 +330,14 @@ class JTI<dag oops, dag iops, InstrItinClass itin, : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin, asm, "", pattern>; - // Atomic load/store instructions - class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin, opc, asm, "", pattern> { let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; - let Inst{20} = 1; + let Inst{20} = 1; let Inst{11-0} = 0b111110011111; } class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, @@ -340,7 +346,7 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, opc, asm, "", pattern> { let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; - let Inst{20} = 0; + let Inst{20} = 0; let Inst{11-4} = 0b11111001; } @@ -350,21 +356,21 @@ class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern> { let Inst{24-21} = opcod; - let Inst{27-26} = {0,0}; + let Inst{27-26} = 0b00; } class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern> { let Inst{24-21} = opcod; - let Inst{27-26} = {0,0}; + let Inst{27-26} = 0b00; } class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, asm, "", pattern> { let Inst{24-21} = opcod; - let Inst{27-26} = {0,0}; + let Inst{27-26} = 0b00; } class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -377,7 +383,7 @@ class AI2<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern> { - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // loads @@ -389,7 +395,7 @@ class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -399,7 +405,7 @@ class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -409,7 +415,7 @@ class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -419,7 +425,7 @@ class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // stores @@ -431,7 +437,7 @@ class AI2stw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -441,7 +447,7 @@ class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -451,7 +457,7 @@ class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -461,7 +467,7 @@ class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Pre-indexed loads @@ -473,7 +479,7 @@ class AI2ldwpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -483,7 +489,7 @@ class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Pre-indexed stores @@ -495,7 +501,7 @@ class AI2stwpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -505,7 +511,7 @@ class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Post-indexed loads @@ -517,7 +523,7 @@ class AI2ldwpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -527,7 +533,7 @@ class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Post-indexed stores @@ -539,7 +545,7 @@ class AI2stwpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -549,7 +555,7 @@ class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // addrmode3 instructions @@ -977,7 +983,7 @@ class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3, Encoding { let Inst{31-27} = opcod1; let Inst{15-14} = opcod2; - let Inst{12} = opcod3; + let Inst{12} = opcod3; } // BR_JT instructions @@ -1099,13 +1105,13 @@ class T1Special<bits<4> opcode> : Encoding16 { // A6.2.4 Load/store single data item encoding. class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 { let Inst{15-12} = opA; - let Inst{11-9} = opB; + let Inst{11-9} = opB; } -class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>; +class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>; class T1LdSt4Imm<bits<3> opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes class T1LdSt1Imm<bits<3> opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte class T1LdSt2Imm<bits<3> opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes -class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative +class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative // A6.2.5 Miscellaneous 16-bit instructions encoding. class T1Misc<bits<7> opcode> : Encoding16 { @@ -1125,9 +1131,10 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<Predicate> Predicates = [IsThumb2]; } -// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as -// an input operand since by default it's a zero register. It will -// become an implicit def once it's "flipped". +// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an +// input operand since by default it's a zero register. It will become an +// implicit def once it's "flipped". +// // FIXME: This uses unified syntax so {s} comes before {p}. We should make it // more consistent. class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, @@ -1185,11 +1192,11 @@ class T2Ii8s4<bit P, bit W, bit load, dag oops, dag iops, InstrItinClass itin, pattern> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; - let Inst{24} = P; - let Inst{23} = ?; // The U bit. - let Inst{22} = 1; - let Inst{21} = W; - let Inst{20} = load; + let Inst{24} = P; + let Inst{23} = ?; // The U bit. + let Inst{22} = 1; + let Inst{21} = W; + let Inst{20} = load; } class T2sI<dag oops, dag iops, InstrItinClass itin, @@ -1225,14 +1232,14 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, list<Predicate> Predicates = [IsThumb2]; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; - let Inst{24} = signed; - let Inst{23} = 0; + let Inst{24} = signed; + let Inst{23} = 0; let Inst{22-21} = opcod; - let Inst{20} = load; - let Inst{11} = 1; + let Inst{20} = load; + let Inst{11} = 1; // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed - let Inst{10} = pre; // The P bit. - let Inst{8} = 1; // The W bit. + let Inst{10} = pre; // The P bit. + let Inst{8} = 1; // The W bit. } // Helper class for disassembly only @@ -1243,9 +1250,9 @@ class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops, : T2I<oops, iops, itin, opc, asm, pattern> { let Inst{31-27} = 0b11111; let Inst{26-24} = 0b011; - let Inst{23} = long; + let Inst{23} = long; let Inst{22-20} = op22_20; - let Inst{7-4} = op7_4; + let Inst{7-4} = op7_4; } // Tv5Pat - Same as Pat<>, but requires V5T Thumb mode. @@ -1325,9 +1332,9 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, } // Load / store multiple -class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, +class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : VFPXI<oops, iops, AddrMode5, Size4Bytes, im, + : VFPXI<oops, iops, AddrMode4, Size4Bytes, im, VFPLdStMulFrm, itin, asm, cstr, pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; @@ -1337,9 +1344,9 @@ class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, let D = VFPNeonDomain; } -class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, +class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : VFPXI<oops, iops, AddrMode5, Size4Bytes, im, + : VFPXI<oops, iops, AddrMode4, Size4Bytes, im, VFPLdStMulFrm, itin, asm, cstr, pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; @@ -1367,8 +1374,8 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{11-8} = 0b1011; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{6} = op6; + let Inst{4} = op4; } // Double precision, binary, VML[AS] (for additional predicate) @@ -1379,12 +1386,11 @@ class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{11-8} = 0b1011; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{6} = op6; + let Inst{4} = op4; list<Predicate> Predicates = [HasVFP2, UseVMLx]; } - // Single precision, unary class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, @@ -1415,8 +1421,8 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{11-8} = 0b1010; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{6} = op6; + let Inst{4} = op4; } // Single precision binary, if no NEON @@ -1521,10 +1527,18 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm, cstr, pattern> { let Inst{31-24} = 0b11110100; - let Inst{23} = op23; + let Inst{23} = op23; let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{7-4} = op7_4; + let Inst{11-8} = op11_8; + let Inst{7-4} = op7_4; +} + +class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr> + : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr, + itin> { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + list<Predicate> Predicates = [HasNEON]; } class NDataI<dag oops, dag iops, Format f, InstrItinClass itin, @@ -1548,13 +1562,13 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6, string opc, string dt, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> { - let Inst{23} = op23; + let Inst{23} = op23; let Inst{21-19} = op21_19; - let Inst{11-8} = op11_8; - let Inst{7} = op7; - let Inst{6} = op6; - let Inst{5} = op5; - let Inst{4} = op4; + let Inst{11-8} = op11_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{5} = op5; + let Inst{4} = op4; } // NEON 2 vector register format. @@ -1567,9 +1581,9 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; let Inst{17-16} = op17_16; - let Inst{11-7} = op11_7; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; } // Same as N2V except it doesn't have a datatype suffix. @@ -1582,9 +1596,9 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; let Inst{17-16} = op17_16; - let Inst{11-7} = op11_7; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; } // NEON 2 vector register with immediate. @@ -1592,12 +1606,12 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> { - let Inst{24} = op24; - let Inst{23} = op23; + let Inst{24} = op24; + let Inst{23} = op23; let Inst{11-8} = op11_8; - let Inst{7} = op7; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{4} = op4; } // NEON 3 vector register format. @@ -1605,12 +1619,12 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> { - let Inst{24} = op24; - let Inst{23} = op23; + let Inst{24} = op24; + let Inst{23} = op23; let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; } // Same as N3V except it doesn't have a data type suffix. @@ -1619,12 +1633,12 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> { - let Inst{24} = op24; - let Inst{23} = op23; + let Inst{24} = op24; + let Inst{23} = op23; let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; } // NEON VMOVs between scalar and core registers. @@ -1634,9 +1648,9 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain, "", itin> { let Inst{27-20} = opcod1; - let Inst{11-8} = opcod2; - let Inst{6-5} = opcod3; - let Inst{4} = 1; + let Inst{11-8} = opcod2; + let Inst{6-5} = opcod3; + let Inst{4} = 1; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); @@ -1670,9 +1684,9 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops, let Inst{24-23} = 0b11; let Inst{21-20} = 0b11; let Inst{19-16} = op19_16; - let Inst{11-7} = 0b11000; - let Inst{6} = op6; - let Inst{4} = 0; + let Inst{11-7} = 0b11000; + let Inst{6} = op6; + let Inst{4} = 0; } // NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 51fc1522485f..e66f9b9ad0ac 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -44,6 +44,10 @@ def SDT_ARMBCC_i64 : SDTypeProfile<0, 6, SDTCisVT<3, i32>, SDTCisVT<4, i32>, SDTCisVT<5, OtherVT>]>; +def SDT_ARMAnd : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; + def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, @@ -54,13 +58,16 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>; -def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; -def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>; +def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>; +def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; @@ -99,11 +106,14 @@ def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, [SDNPHasChain]>; +def ARMand : SDNode<"ARMISD::AND", SDT_ARMAnd, + [SDNPOutFlag]>; + def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, [SDNPOutFlag]>; def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, - [SDNPOutFlag,SDNPCommutative]>; + [SDNPOutFlag, SDNPCommutative]>; def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; @@ -117,51 +127,54 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; -def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7, - [SDNPHasChain]>; -def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7, - [SDNPHasChain]>; -def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6, - [SDNPHasChain]>; -def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6, - [SDNPHasChain]>; +def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, + [SDNPHasChain]>; +def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER, + [SDNPHasChain]>; +def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR, + [SDNPHasChain]>; +def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR, + [SDNPHasChain]>; def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; + +def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // -def HasV4T : Predicate<"Subtarget->hasV4TOps()">; -def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; -def HasV5T : Predicate<"Subtarget->hasV5TOps()">; -def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; -def HasV6 : Predicate<"Subtarget->hasV6Ops()">; -def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; -def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; -def HasV7 : Predicate<"Subtarget->hasV7Ops()">; -def NoVFP : Predicate<"!Subtarget->hasVFP2()">; -def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; -def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; -def HasNEON : Predicate<"Subtarget->hasNEON()">; -def HasDivide : Predicate<"Subtarget->hasDivide()">; +def HasV4T : Predicate<"Subtarget->hasV4TOps()">; +def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; +def HasV5T : Predicate<"Subtarget->hasV5TOps()">; +def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; +def HasV6 : Predicate<"Subtarget->hasV6Ops()">; +def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; +def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; +def HasV7 : Predicate<"Subtarget->hasV7Ops()">; +def NoVFP : Predicate<"!Subtarget->hasVFP2()">; +def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; +def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; +def HasNEON : Predicate<"Subtarget->hasNEON()">; +def HasDivide : Predicate<"Subtarget->hasDivide()">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">; -def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; +def HasDB : Predicate<"Subtarget->hasDataBarrier()">; +def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; -def IsThumb : Predicate<"Subtarget->isThumb()">; -def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; -def IsThumb2 : Predicate<"Subtarget->isThumb2()">; -def IsARM : Predicate<"!Subtarget->isThumb()">; -def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; -def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; +def IsThumb : Predicate<"Subtarget->isThumb()">; +def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; +def IsThumb2 : Predicate<"Subtarget->isThumb2()">; +def IsARM : Predicate<"!Subtarget->isThumb()">; +def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; +def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; // FIXME: Eventually this will be just "hasV6T2Ops". -def UseMovt : Predicate<"Subtarget->useMovt()">; -def DontUseMovt : Predicate<"!Subtarget->useMovt()">; - -def UseVMLx : Predicate<"Subtarget->useVMLx()">; +def UseMovt : Predicate<"Subtarget->useMovt()">; +def DontUseMovt : Predicate<"!Subtarget->useMovt()">; +def UseVMLx : Predicate<"Subtarget->useVMLx()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -221,29 +234,12 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{ /// e.g., 0xf000ffff def bf_inv_mask_imm : Operand<i32>, PatLeaf<(imm), [{ - uint32_t v = (uint32_t)N->getZExtValue(); - if (v == 0xffffffff) - return 0; - // there can be 1's on either or both "outsides", all the "inside" - // bits must be 0's - unsigned int lsb = 0, msb = 31; - while (v & (1 << msb)) --msb; - while (v & (1 << lsb)) ++lsb; - for (unsigned int i = lsb; i <= msb; ++i) { - if (v & (1 << i)) - return 0; - } - return 1; + return ARM::isBitFieldInvertedMask(N->getZExtValue()); }] > { let PrintMethod = "printBitfieldInvMaskImmOperand"; } /// Split a 32-bit immediate into two 16 bit parts. -def lo16 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() & 0xffff, - MVT::i32); -}]>; - def hi16 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32); }]>; @@ -306,6 +302,13 @@ def pclabel : Operand<i32> { let PrintMethod = "printPCLabel"; } +// shift_imm: An integer that encodes a shift amount and the type of shift +// (currently either asr or lsl) using the same encoding used for the +// immediates in so_reg operands. +def shift_imm : Operand<i32> { + let PrintMethod = "printShiftImmOperand"; +} + // shifter_operand operands: so_reg and so_imm. def so_reg : Operand<i32>, // reg reg imm ComplexPattern<i32, 3, "SelectShifterOperandReg", @@ -319,10 +322,7 @@ def so_reg : Operand<i32>, // reg reg imm // represented in the imm field in the same 12-bit form that they are encoded // into so_imm instructions: the 8-bit immediate is the least significant bits // [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11]. -def so_imm : Operand<i32>, - PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; - }]> { +def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> { let PrintMethod = "printSOImmOperand"; } @@ -452,11 +452,15 @@ include "ARMInstrFormats.td" /// binop that produces a value. multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { + // The register-immediate version is re-materializable. This is useful + // in particular for taking the address of a local. + let isReMaterializable = 1 in { def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> { let Inst{25} = 1; } + } def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { @@ -502,7 +506,7 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, /// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test /// patterns. Similar to AsI1_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. -let Defs = [CPSR] in { +let isCompare = 1, Defs = [CPSR] in { multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi, @@ -1117,7 +1121,7 @@ let isBranch = 1, isTerminator = 1 in { let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target \n$jt", + IIC_Br, "mov\tpc, $target$jt", [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { let Inst{11-4} = 0b00000000; let Inst{15-12} = 0b1111; @@ -1127,7 +1131,7 @@ let isBranch = 1, isTerminator = 1 in { } def BR_JTm : JTI<(outs), (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "ldr\tpc, $target \n$jt", + IIC_Br, "ldr\tpc, $target$jt", [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, imm:$id)]> { let Inst{15-12} = 0b1111; @@ -1139,7 +1143,7 @@ let isBranch = 1, isTerminator = 1 in { } def BR_JTadd : JTI<(outs), (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "add\tpc, $target, $idx \n$jt", + IIC_Br, "add\tpc, $target, $idx$jt", [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]> { let Inst{15-12} = 0b1111; @@ -1573,8 +1577,12 @@ defm UXTH : AI_unary_rrot<0b01101111, defm UXTB16 : AI_unary_rrot<0b01101100, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; -def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), - (UXTB16r_rot GPR:$Src, 24)>; +// FIXME: This pattern incorrectly assumes the shl operator is a rotate. +// The transformation should probably be done as a combiner action +// instead so we can include a check for masking back in the upper +// eight bits of the source into the lower eight bits of the result. +//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), +// (UXTB16r_rot GPR:$Src, 24)>; def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (UXTB16r_rot GPR:$Src, 8)>; @@ -1631,16 +1639,24 @@ defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; -// These don't define reg/reg forms, because they are handled above. def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, "rsb", "\t$dst, $a, $b", - [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> { + IIC_iALUi, "rsb", "\t$dst, $a, $b", + [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> { let Inst{25} = 1; } +// The reg/reg form is only defined for the disassembler; for codegen it is +// equivalent to SUBrr. +def RSBrr : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, + IIC_iALUr, "rsb", "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]> { + let Inst{25} = 0; + let Inst{11-4} = 0b00000000; +} + def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, "rsb", "\t$dst, $a, $b", - [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> { + IIC_iALUsr, "rsb", "\t$dst, $a, $b", + [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> { let Inst{25} = 0; } @@ -1667,6 +1683,14 @@ def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), Requires<[IsARM]> { let Inst{25} = 1; } +// The reg/reg form is only defined for the disassembler; for codegen it is +// equivalent to SUBrr. +def RSCrr : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + DPFrm, IIC_iALUr, "rsc", "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]> { + let Inst{25} = 0; + let Inst{11-4} = 0b00000000; +} def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b", [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>, @@ -1716,24 +1740,26 @@ def : ARMPat<(adde GPR:$src, so_imm_not:$imm), // ARM Arithmetic Instruction -- for disassembly only // GPR:$dst = GPR:$a op GPR:$b -class AAI<bits<8> op27_20, bits<4> op7_4, string opc> +class AAI<bits<8> op27_20, bits<4> op7_4, string opc, + list<dag> pattern = [/* For disassembly only; pattern left blank */]> : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, - opc, "\t$dst, $a, $b", - [/* For disassembly only; pattern left blank */]> { + opc, "\t$dst, $a, $b", pattern> { let Inst{27-20} = op27_20; let Inst{7-4} = op7_4; } // Saturating add/subtract -- for disassembly only -def QADD : AAI<0b00010000, 0b0101, "qadd">; +def QADD : AAI<0b00010000, 0b0101, "qadd", + [(set GPR:$dst, (int_arm_qadd GPR:$a, GPR:$b))]>; def QADD16 : AAI<0b01100010, 0b0001, "qadd16">; def QADD8 : AAI<0b01100010, 0b1001, "qadd8">; def QASX : AAI<0b01100010, 0b0011, "qasx">; def QDADD : AAI<0b00010100, 0b0101, "qdadd">; def QDSUB : AAI<0b00010110, 0b0101, "qdsub">; def QSAX : AAI<0b01100010, 0b0101, "qsax">; -def QSUB : AAI<0b00010010, 0b0101, "qsub">; +def QSUB : AAI<0b00010010, 0b0101, "qsub", + [(set GPR:$dst, (int_arm_qsub GPR:$a, GPR:$b))]>; def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">; def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">; def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">; @@ -1793,54 +1819,45 @@ def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), // Signed/Unsigned saturate -- for disassembly only -def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{27-21} = 0b0110101; - let Inst{6-4} = 0b001; -} - -def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def SSAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh), + SatFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{27-21} = 0b0110101; - let Inst{6-4} = 0b101; + let Inst{5-4} = 0b01; } -def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm, +def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm, NoItinerary, "ssat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{27-20} = 0b01101010; let Inst{7-4} = 0b0011; } -def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{27-21} = 0b0110111; - let Inst{6-4} = 0b001; -} - -def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def USAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh), + SatFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{27-21} = 0b0110111; - let Inst{6-4} = 0b101; + let Inst{5-4} = 0b01; } -def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm, +def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm, NoItinerary, "usat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{27-20} = 0b01101110; let Inst{7-4} = 0b0011; } +def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>; +def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>; + //===----------------------------------------------------------------------===// // Bitwise Instructions. // defm AND : AsI1_bin_irs<0b0000, "and", BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; +defm ANDS : AI1_bin_s_irs<0b0000, "and", + BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>; defm ORR : AsI1_bin_irs<0b1100, "orr", BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; defm EOR : AsI1_bin_irs<0b0001, "eor", @@ -1858,11 +1875,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), } // A8.6.18 BFI - Bitfield insert (Encoding A1) -// Added for disassembler with the pattern field purposely left blank. -def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), +def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm), AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfi", "\t$dst, $src, $imm", "", - [/* For disassembly only; pattern left blank */]>, + "bfi", "\t$dst, $val, $imm", "$src = $dst", + [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val, + bf_inv_mask_imm:$imm))]>, Requires<[IsARM, HasV6T2]> { let Inst{27-21} = 0b0111110; let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 @@ -2232,11 +2249,20 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, let Inst{19-16} = 0b1111; } +def lsl_shift_imm : SDNodeXForm<imm, [{ + unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue()); + return CurDAG->getTargetConstant(Sh, MVT::i32); +}]>; + +def lsl_amt : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() < 32); +}], lsl_shift_imm>; + def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), - (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", + (ins GPR:$src1, GPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), - (and (shl GPR:$src2, (i32 imm:$shamt)), + (and (shl GPR:$src2, lsl_amt:$sh), 0xFFFF0000)))]>, Requires<[IsARM, HasV6]> { let Inst{6-4} = 0b001; @@ -2245,26 +2271,37 @@ def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), // Alternate cases for PKHBT where identities eliminate some nodes. def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), (PKHBT GPR:$src1, GPR:$src2, 0)>; -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), - (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$sh)), + (PKHBT GPR:$src1, GPR:$src2, (lsl_shift_imm imm16_31:$sh))>; + +def asr_shift_imm : SDNodeXForm<imm, [{ + unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue()); + return CurDAG->getTargetConstant(Sh, MVT::i32); +}]>; +def asr_amt : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() <= 32); +}], asr_shift_imm>; +// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and +// will match the pattern below. def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst), - (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", + (ins GPR:$src1, GPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), - (and (sra GPR:$src2, imm16_31:$shamt), - 0xFFFF)))]>, Requires<[IsARM, HasV6]> { + (and (sra GPR:$src2, asr_amt:$sh), + 0xFFFF)))]>, + Requires<[IsARM, HasV6]> { let Inst{6-4} = 0b101; } // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))), - (PKHTB GPR:$src1, GPR:$src2, 16)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)), + (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>; def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), - (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), - (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>; + (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)), + (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>; //===----------------------------------------------------------------------===// // Comparison Instructions... @@ -2272,8 +2309,52 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), defm CMP : AI1_cmp_irs<0b1010, "cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; -//FIXME: Disable CMN, as CCodes are backwards from compare expectations -// Compare-to-zero still works out, just not the relationals + +// FIXME: There seems to be a (potential) hardware bug with the CMN instruction +// and comparison with 0. These two pieces of code should give identical +// results: +// +// rsbs r1, r1, 0 +// cmp r0, r1 +// mov r0, #0 +// it ls +// mov r0, #1 +// +// and: +// +// cmn r0, r1 +// mov r0, #0 +// it ls +// mov r0, #1 +// +// However, the CMN gives the *opposite* result when r1 is 0. This is because +// the carry flag is set in the CMP case but not in the CMN case. In short, the +// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the +// value of r0 and the carry bit (because the "carry bit" parameter to +// AddWithCarry is defined as 1 in this case, the carry flag will always be set +// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is +// never a "carry" when this AddWithCarry is performed (because the "carry bit" +// parameter to AddWithCarry is defined as 0). +// +// The AddWithCarry in the CMP case seems to be relying upon the identity: +// +// ~x + 1 = -x +// +// However when x is 0 and unsigned, this doesn't hold: +// +// x = 0 +// ~x = 0xFFFF FFFF +// ~x + 1 = 0x1 0000 0000 +// (-x = 0) != (0x1 0000 0000 = ~x + 1) +// +// Therefore, we should disable *all* versions of CMN, especially when comparing +// against zero, until we can limit when the CMN instruction is used (when we +// know that the RHS is not 0) or when we have a hardware fix for this. +// +// (See the ARM docs for the "AddWithCarry" pseudo-code.) +// +// This is related to <rdar://problem/7569620>. +// //defm CMN : AI1_cmp_irs<0b1011, "cmn", // BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; @@ -2298,8 +2379,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, Defs = [CPSR] in { def BCCi64 : PseudoInst<(outs), - (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), - IIC_Br, + (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), + IIC_Br, "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc", [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>; @@ -2346,102 +2427,63 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def Int_MemBarrierV7 : AInoP<(outs), (ins), - Pseudo, NoItinerary, - "dmb", "", - [(ARMMemBarrierV7)]>, - Requires<[IsARM, HasV7]> { +def DMBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dmb", "", + [(ARMMemBarrier)]>, Requires<[IsARM, HasDB]> { let Inst{31-4} = 0xf57ff05; // FIXME: add support for options other than a full system DMB // See DMB disassembly-only variants below. let Inst{3-0} = 0b1111; } -def Int_SyncBarrierV7 : AInoP<(outs), (ins), - Pseudo, NoItinerary, - "dsb", "", - [(ARMSyncBarrierV7)]>, - Requires<[IsARM, HasV7]> { +def DSBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dsb", "", + [(ARMSyncBarrier)]>, Requires<[IsARM, HasDB]> { let Inst{31-4} = 0xf57ff04; // FIXME: add support for options other than a full system DSB // See DSB disassembly-only variants below. let Inst{3-0} = 0b1111; } -def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero), - Pseudo, NoItinerary, +def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary, "mcr", "\tp15, 0, $zero, c7, c10, 5", - [(ARMMemBarrierV6 GPR:$zero)]>, + [(ARMMemBarrierMCR GPR:$zero)]>, Requires<[IsARM, HasV6]> { // FIXME: add support for options other than a full system DMB // FIXME: add encoding } -def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero), - Pseudo, NoItinerary, +def DSB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary, "mcr", "\tp15, 0, $zero, c7, c10, 4", - [(ARMSyncBarrierV6 GPR:$zero)]>, + [(ARMSyncBarrierMCR GPR:$zero)]>, Requires<[IsARM, HasV6]> { // FIXME: add support for options other than a full system DSB // FIXME: add encoding } } -// Helper class for multiclass MemB -- for disassembly only -class AMBI<string opc, string asm> - : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm, - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV7]> { - let Inst{31-20} = 0xf57; -} - -multiclass MemB<bits<4> op7_4, string opc> { - - def st : AMBI<opc, "\tst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1110; - } - - def ish : AMBI<opc, "\tish"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1011; - } - - def ishst : AMBI<opc, "\tishst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1010; - } - - def nsh : AMBI<opc, "\tnsh"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0111; - } - - def nshst : AMBI<opc, "\tnshst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0110; - } +// Memory Barrier Operations Variants -- for disassembly only - def osh : AMBI<opc, "\tosh"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0011; - } +def memb_opt : Operand<i32> { + let PrintMethod = "printMemBOption"; +} - def oshst : AMBI<opc, "\toshst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0010; - } +class AMBI<bits<4> op7_4, string opc> + : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, opc, "\t$opt", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasDB]> { + let Inst{31-8} = 0xf57ff0; + let Inst{7-4} = op7_4; } // These DMB variants are for disassembly only. -defm DMB : MemB<0b0101, "dmb">; +def DMBvar : AMBI<0b0101, "dmb">; // These DSB variants are for disassembly only. -defm DSB : MemB<0b0100, "dsb">; +def DSBvar : AMBI<0b0100, "dsb">; // ISB has only full system option -- for disassembly only -def ISBsy : AMBI<"isb", ""> { - let Inst{7-4} = 0b0110; +def ISBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>, + Requires<[IsARM, HasDB]> { + let Inst{31-4} = 0xf57ff06; let Inst{3-0} = 0b1111; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 7f7eb980abe8..4d2f1169061f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -93,6 +93,11 @@ def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; +def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisSameAs<1, 2>]>; +def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; +def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; + def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; @@ -100,14 +105,14 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); - unsigned EltBits; + unsigned EltBits = 0; uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 32 && EltVal == 0); }]>; def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); - unsigned EltBits; + unsigned EltBits = 0; uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 8 && EltVal == 0xff); }]>; @@ -124,15 +129,16 @@ def nModImm : Operand<i32> { // NEON load / store instructions //===----------------------------------------------------------------------===// -let mayLoad = 1, neverHasSideEffects = 1 in { // Use vldmia to load a Q register as a D register pair. // This is equivalent to VLDMD except that it has a Q register operand // instead of a pair of D registers. def VLDMQ - : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p), + : AXDI4<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p), IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>; + "vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "", + [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>; +let mayLoad = 1, neverHasSideEffects = 1 in { // Use vld1 to load a Q register as a D register pair. // This alternative to VLDMQ allows an alignment to be specified. // This is equivalent to VLD1q64 except that it has a Q register operand. @@ -141,15 +147,16 @@ def VLD1q IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; } // mayLoad = 1, neverHasSideEffects = 1 -let mayStore = 1, neverHasSideEffects = 1 in { // Use vstmia to store a Q register as a D register pair. // This is equivalent to VSTMD except that it has a Q register operand // instead of a pair of D registers. def VSTMQ - : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p), + : AXDI4<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p), IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>; + "vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "", + [(store (v2f64 QPR:$src), addrmode4:$addr)]>; +let mayStore = 1, neverHasSideEffects = 1 in { // Use vst1 to store a Q register as a D register pair. // This alternative to VSTMQ allows an alignment to be specified. // This is equivalent to VST1q64 except that it has a Q register operand. @@ -160,6 +167,25 @@ def VST1q let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +// Classes for VLD* pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VLDQPseudo + : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">; +class VLDQWBPseudo + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VST, + "$addr.addr = $wb">; +class VLDQQPseudo + : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">; +class VLDQQWBPseudo + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VST, + "$addr.addr = $wb">; +class VLDQQQQWBPseudo + : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST, + "$addr.addr = $wb, $src = $dst">; + // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), @@ -180,6 +206,11 @@ def VLD1q16 : VLD1Q<0b0100, "16">; def VLD1q32 : VLD1Q<0b1000, "32">; def VLD1q64 : VLD1Q<0b1100, "64">; +def VLD1q8Pseudo : VLDQPseudo; +def VLD1q16Pseudo : VLDQPseudo; +def VLD1q32Pseudo : VLDQPseudo; +def VLD1q64Pseudo : VLDQPseudo; + // ...with address register writeback: class VLD1DWB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), @@ -202,6 +233,11 @@ def VLD1q16_UPD : VLD1QWB<0b0100, "16">; def VLD1q32_UPD : VLD1QWB<0b1000, "32">; def VLD1q64_UPD : VLD1QWB<0b1100, "64">; +def VLD1q8Pseudo_UPD : VLDQWBPseudo; +def VLD1q16Pseudo_UPD : VLDQWBPseudo; +def VLD1q32Pseudo_UPD : VLDQWBPseudo; +def VLD1q64Pseudo_UPD : VLDQWBPseudo; + // ...with 3 registers (some of these are only for the disassembler): class VLD1D3<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), @@ -222,6 +258,9 @@ def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">; +def VLD1d64TPseudo : VLDQQPseudo; +def VLD1d64TPseudo_UPD : VLDQQWBPseudo; + // ...with 4 registers (some of these are only for the disassembler): class VLD1D4<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), @@ -244,6 +283,9 @@ def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">; +def VLD1d64QPseudo : VLDQQPseudo; +def VLD1d64QPseudo_UPD : VLDQQWBPseudo; + // VLD2 : Vector Load (multiple 2-element structures) class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), @@ -263,6 +305,14 @@ def VLD2q8 : VLD2Q<0b0000, "8">; def VLD2q16 : VLD2Q<0b0100, "16">; def VLD2q32 : VLD2Q<0b1000, "32">; +def VLD2d8Pseudo : VLDQPseudo; +def VLD2d16Pseudo : VLDQPseudo; +def VLD2d32Pseudo : VLDQPseudo; + +def VLD2q8Pseudo : VLDQQPseudo; +def VLD2q16Pseudo : VLDQQPseudo; +def VLD2q32Pseudo : VLDQQPseudo; + // ...with address register writeback: class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), @@ -284,6 +334,14 @@ def VLD2q8_UPD : VLD2QWB<0b0000, "8">; def VLD2q16_UPD : VLD2QWB<0b0100, "16">; def VLD2q32_UPD : VLD2QWB<0b1000, "32">; +def VLD2d8Pseudo_UPD : VLDQWBPseudo; +def VLD2d16Pseudo_UPD : VLDQWBPseudo; +def VLD2d32Pseudo_UPD : VLDQWBPseudo; + +def VLD2q8Pseudo_UPD : VLDQQWBPseudo; +def VLD2q16Pseudo_UPD : VLDQQWBPseudo; +def VLD2q32Pseudo_UPD : VLDQQWBPseudo; + // ...with double-spaced registers (for disassembly only): def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; @@ -302,6 +360,10 @@ def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; +def VLD3d8Pseudo : VLDQQPseudo; +def VLD3d16Pseudo : VLDQQPseudo; +def VLD3d32Pseudo : VLDQQPseudo; + // ...with address register writeback: class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, @@ -314,6 +376,10 @@ def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; +def VLD3d8Pseudo_UPD : VLDQQWBPseudo; +def VLD3d16Pseudo_UPD : VLDQQWBPseudo; +def VLD3d32Pseudo_UPD : VLDQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; @@ -322,10 +388,14 @@ def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; +def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo; +def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo; +def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">; -def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">; -def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">; +def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -338,6 +408,10 @@ def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; +def VLD4d8Pseudo : VLDQQPseudo; +def VLD4d16Pseudo : VLDQQPseudo; +def VLD4d32Pseudo : VLDQQPseudo; + // ...with address register writeback: class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, @@ -350,6 +424,10 @@ def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; +def VLD4d8Pseudo_UPD : VLDQQWBPseudo; +def VLD4d16Pseudo_UPD : VLDQQWBPseudo; +def VLD4d32Pseudo_UPD : VLDQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; @@ -358,10 +436,14 @@ def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; +def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo; +def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo; +def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">; -def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">; -def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">; +def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo; // VLD1LN : Vector Load (single element to one lane) // FIXME: Not yet implemented. @@ -486,6 +568,25 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { +// Classes for VST* pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VSTQPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">; +class VSTQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, + "$addr.addr = $wb">; +class VSTQQPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">; +class VSTQQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST, + "$addr.addr = $wb">; +class VSTQQQQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST, + "$addr.addr = $wb">; + // VST1 : Vector Store (multiple single elements) class VST1D<bits<4> op7_4, string Dt> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, @@ -505,6 +606,11 @@ def VST1q16 : VST1Q<0b0100, "16">; def VST1q32 : VST1Q<0b1000, "32">; def VST1q64 : VST1Q<0b1100, "64">; +def VST1q8Pseudo : VSTQPseudo; +def VST1q16Pseudo : VSTQPseudo; +def VST1q32Pseudo : VSTQPseudo; +def VST1q64Pseudo : VSTQPseudo; + // ...with address register writeback: class VST1DWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), @@ -525,6 +631,11 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">; def VST1q32_UPD : VST1QWB<0b1000, "32">; def VST1q64_UPD : VST1QWB<0b1100, "64">; +def VST1q8Pseudo_UPD : VSTQWBPseudo; +def VST1q16Pseudo_UPD : VSTQWBPseudo; +def VST1q32Pseudo_UPD : VSTQWBPseudo; +def VST1q64Pseudo_UPD : VSTQWBPseudo; + // ...with 3 registers (some of these are only for the disassembler): class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), @@ -547,6 +658,9 @@ def VST1d16T_UPD : VST1D3WB<0b0100, "16">; def VST1d32T_UPD : VST1D3WB<0b1000, "32">; def VST1d64T_UPD : VST1D3WB<0b1100, "64">; +def VST1d64TPseudo : VSTQQPseudo; +def VST1d64TPseudo_UPD : VSTQQWBPseudo; + // ...with 4 registers (some of these are only for the disassembler): class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), @@ -570,6 +684,9 @@ def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; def VST1d64Q_UPD : VST1D4WB<0b1100, "64">; +def VST1d64QPseudo : VSTQQPseudo; +def VST1d64QPseudo_UPD : VSTQQWBPseudo; + // VST2 : Vector Store (multiple 2-element structures) class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), @@ -589,6 +706,14 @@ def VST2q8 : VST2Q<0b0000, "8">; def VST2q16 : VST2Q<0b0100, "16">; def VST2q32 : VST2Q<0b1000, "32">; +def VST2d8Pseudo : VSTQPseudo; +def VST2d16Pseudo : VSTQPseudo; +def VST2d32Pseudo : VSTQPseudo; + +def VST2q8Pseudo : VSTQQPseudo; +def VST2q16Pseudo : VSTQQPseudo; +def VST2q32Pseudo : VSTQQPseudo; + // ...with address register writeback: class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -610,6 +735,14 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">; def VST2q16_UPD : VST2QWB<0b0100, "16">; def VST2q32_UPD : VST2QWB<0b1000, "32">; +def VST2d8Pseudo_UPD : VSTQWBPseudo; +def VST2d16Pseudo_UPD : VSTQWBPseudo; +def VST2d32Pseudo_UPD : VSTQWBPseudo; + +def VST2q8Pseudo_UPD : VSTQQWBPseudo; +def VST2q16Pseudo_UPD : VSTQQWBPseudo; +def VST2q32Pseudo_UPD : VSTQQWBPseudo; + // ...with double-spaced registers (for disassembly only): def VST2b8 : VST2D<0b1001, 0b0000, "8">; def VST2b16 : VST2D<0b1001, 0b0100, "16">; @@ -628,6 +761,10 @@ def VST3d8 : VST3D<0b0100, 0b0000, "8">; def VST3d16 : VST3D<0b0100, 0b0100, "16">; def VST3d32 : VST3D<0b0100, 0b1000, "32">; +def VST3d8Pseudo : VSTQQPseudo; +def VST3d16Pseudo : VSTQQPseudo; +def VST3d32Pseudo : VSTQQPseudo; + // ...with address register writeback: class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -640,6 +777,10 @@ def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; +def VST3d8Pseudo_UPD : VSTQQWBPseudo; +def VST3d16Pseudo_UPD : VSTQQWBPseudo; +def VST3d32Pseudo_UPD : VSTQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VST3q8 : VST3D<0b0101, 0b0000, "8">; def VST3q16 : VST3D<0b0101, 0b0100, "16">; @@ -648,10 +789,14 @@ def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; +def VST3q8Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q16Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q32Pseudo_UPD : VSTQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">; -def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">; -def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">; +def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo; // VST4 : Vector Store (multiple 4-element structures) class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -664,6 +809,10 @@ def VST4d8 : VST4D<0b0000, 0b0000, "8">; def VST4d16 : VST4D<0b0000, 0b0100, "16">; def VST4d32 : VST4D<0b0000, 0b1000, "32">; +def VST4d8Pseudo : VSTQQPseudo; +def VST4d16Pseudo : VSTQQPseudo; +def VST4d32Pseudo : VSTQQPseudo; + // ...with address register writeback: class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -676,6 +825,10 @@ def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; +def VST4d8Pseudo_UPD : VSTQQWBPseudo; +def VST4d16Pseudo_UPD : VSTQQWBPseudo; +def VST4d32Pseudo_UPD : VSTQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VST4q8 : VST4D<0b0001, 0b0000, "8">; def VST4q16 : VST4D<0b0001, 0b0100, "16">; @@ -684,10 +837,14 @@ def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; +def VST4q8Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q16Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q32Pseudo_UPD : VSTQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">; -def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">; -def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">; +def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; // VST1LN : Vector Store (single element from one lane) // FIXME: Not yet implemented. @@ -879,6 +1036,15 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; +// Narrow 2-register operations. +class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyD, ValueType TyQ, SDNode OpNode> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), + (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", + [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>; + // Narrow 2-register intrinsics. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -888,14 +1054,14 @@ class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; -// Long 2-register intrinsics (currently only used for VMOVL). -class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp> +// Long 2-register operations (currently only used for VMOVL). +class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst), (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; + [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>; // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> @@ -1150,6 +1316,24 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, (ResTy (NEONvduplane (OpTy DPR_8:$src3), imm:$lane)))))))]>; +// Neon Intrinsic-Op instructions (VABA): double- and quad-register. +class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set DPR:$dst, (Ty (OpNode DPR:$src1, + (Ty (IntOp (Ty DPR:$src2), (Ty DPR:$src3))))))]>; +class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, (Ty (OpNode QPR:$src1, + (Ty (IntOp (Ty QPR:$src2), (Ty QPR:$src3))))))]>; + // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, @@ -1169,6 +1353,53 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; +// Long Multiply-Add/Sub operations. +class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, (OpNode (TyQ QPR:$src1), + (TyQ (MulOp (TyD DPR:$src2), + (TyD DPR:$src3)))))]>; +class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), + (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", + [(set QPR:$dst, + (OpNode (TyQ QPR:$src1), + (TyQ (MulOp (TyD DPR:$src2), + (TyD (NEONvduplane (TyD DPR_VFP2:$src3), + imm:$lane))))))]>; +class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), + (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", + [(set QPR:$dst, + (OpNode (TyQ QPR:$src1), + (TyQ (MulOp (TyD DPR:$src2), + (TyD (NEONvduplane (TyD DPR_8:$src3), + imm:$lane))))))]>; + +// Long Intrinsic-Op vector operations with explicit extend (VABAL). +class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, (OpNode (TyQ QPR:$src1), + (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src2), + (TyD DPR:$src3)))))))]>; + // Neon Long 3-argument intrinsic. The destination register is // a quad-register and is also used as the first source operand register. class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, @@ -1217,6 +1448,61 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let isCommutable = Commutable; } +// Long 3-register operations. +class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> { + let isCommutable = Commutable; +} +class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + [(set QPR:$dst, + (TyQ (OpNode (TyD DPR:$src1), + (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>; +class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + [(set QPR:$dst, + (TyQ (OpNode (TyD DPR:$src1), + (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>; + +// Long 3-register operations with explicitly extended operands. +class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, + bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))), + (TyQ (ExtOp (TyD DPR:$src2)))))]> { + let isCommutable = Commutable; +} + +// Long 3-register intrinsics with explicit extend (VABDL). +class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1), + (TyD DPR:$src2))))))]> { + let isCommutable = Commutable; +} + // Long 3-register intrinsics. class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, @@ -1248,14 +1534,15 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, (OpTy (NEONvduplane (OpTy DPR_8:$src2), imm:$lane)))))]>; -// Wide 3-register intrinsics. -class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, - Intrinsic IntOp, bit Commutable> +// Wide 3-register operations. +class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, + SDNode OpNode, SDNode ExtOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD, OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { + [(set QPR:$dst, (OpNode (TyQ QPR:$src1), + (TyQ (ExtOp (TyD DPR:$src2)))))]> { let isCommutable = Commutable; } @@ -1488,6 +1775,23 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, } +// Neon Narrowing 2-register vector operations, +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, OpNode>; + def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, OpNode>; + def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, OpNode>; +} + // Neon Narrowing 2-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, @@ -1508,14 +1812,14 @@ multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). // source operand element sizes of 16, 32 and 64 bits: -multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, - string OpcodeStr, string Dt, Intrinsic IntOp> { - def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; - def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; - def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; +multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, + string OpcodeStr, string Dt, SDNode OpNode> { + def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; + def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; + def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; } @@ -1607,6 +1911,47 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, } +// Neon Long 3-register vector operations. + +multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> { + def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, OpNode, Commutable>; + def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, OpNode, Commutable>; + def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, OpNode, Commutable>; +} + +multiclass N3VLSL_HS<bit op24, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, + !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; + def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, + !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; +} + +multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, + SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, OpNode, ExtOp, Commutable>; + def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, OpNode, ExtOp, Commutable>; + def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, OpNode, ExtOp, Commutable>; +} + // Neon Long 3-register vector intrinsics. // First with only element sizes of 16 and 32 bits: @@ -1643,21 +1988,36 @@ multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, v8i16, v8i8, IntOp, Commutable>; } +// ....with explicit extend (VABDL). +multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, IntOp, ExtOp, Commutable>; + def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, IntOp, ExtOp, Commutable>; + def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, IntOp, ExtOp, Commutable>; +} + // Neon Wide 3-register vector intrinsics, // source operand element sizes of 8, 16 and 32 bits: -multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { - def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, - OpcodeStr, !strconcat(Dt, "8"), - v8i16, v8i8, IntOp, Commutable>; - def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, - OpcodeStr, !strconcat(Dt, "16"), - v4i32, v4i16, IntOp, Commutable>; - def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, - OpcodeStr, !strconcat(Dt, "32"), - v2i64, v2i32, IntOp, Commutable>; +multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, + SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, OpNode, ExtOp, Commutable>; + def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, OpNode, ExtOp, Commutable>; + def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, OpNode, ExtOp, Commutable>; } @@ -1700,6 +2060,29 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8, mul, ShOp>; } +// Neon Intrinsic-Op vector operations, +// element sizes of 8, 16 and 32 bits: +multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, + string OpcodeStr, string Dt, Intrinsic IntOp, + SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; + def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; + def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; + + // 128-bit vector types. + def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; + def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; + def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; +} + // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, @@ -1723,6 +2106,29 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, } +// Neon Long Multiply-Op vector operations, +// element sizes of 8, 16 and 32 bits: +multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, SDNode MulOp, + SDNode OpNode> { + def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, + !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; + def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, + !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; + def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, + !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; +} + +multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, + string Dt, SDNode MulOp, SDNode OpNode> { + def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, + !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; + def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, + !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; +} + + // Neon Long 3-argument intrinsics. // First with only element sizes of 16 and 32 bits: @@ -1752,6 +2158,21 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; } +// ....with explicit extend (VABAL). +multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { + def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, + IntOp, ExtOp, OpNode>; + def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, + IntOp, ExtOp, OpNode>; + def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, + IntOp, ExtOp, OpNode>; +} + // Neon 2-register vector intrinsics, // element sizes of 8, 16 and 32 bits: @@ -1996,13 +2417,13 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "s", int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "u", int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "s", add, sext, 1>; +defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "u", add, zext, 1>; // VADDW : Vector Add Wide (Q = Q + D) -defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; -defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; +defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; +defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; // VHADD : Vector Halving Add defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, @@ -2113,16 +2534,14 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "s", int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "u", int_arm_neon_vmullu, 1>; +defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", NEONvmulls, 1>; +defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", NEONvmullu, 1>; def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", v8i16, v8i8, int_arm_neon_vmullp, 1>; -defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", - int_arm_neon_vmulls>; -defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", - int_arm_neon_vmullu>; +defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; +defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, @@ -2172,13 +2591,13 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) -defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlal", "s", int_arm_neon_vmlals>; -defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlal", "u", int_arm_neon_vmlalu>; +defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "s", NEONvmulls, add>; +defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "u", NEONvmullu, add>; -defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; -defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; +defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; +defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, @@ -2224,13 +2643,13 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) -defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlsl", "s", int_arm_neon_vmlsls>; -defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlsl", "u", int_arm_neon_vmlslu>; +defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "s", NEONvmulls, sub>; +defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "u", NEONvmullu, sub>; -defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; -defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; +defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; +defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, @@ -2247,13 +2666,13 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "s", int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "u", int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "s", sub, sext, 0>; +defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "u", sub, zext, 0>; // VSUBW : Vector Subtract Wide (Q = Q - D) -defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; -defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; +defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; +defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; // VHSUB : Vector Halving Subtract defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, @@ -2469,32 +2888,32 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, // VABD : Vector Absolute Difference defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "s", int_arm_neon_vabds, 0>; + "vabd", "s", int_arm_neon_vabds, 1>; defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "u", int_arm_neon_vabdu, 0>; + "vabd", "u", int_arm_neon_vabdu, 1>; def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, - "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; + "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, - "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; + "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabdl", "s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabdl", "u", int_arm_neon_vabdlu, 0>; +defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, + "vabdl", "s", int_arm_neon_vabds, zext, 1>; +defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, + "vabdl", "u", int_arm_neon_vabdu, zext, 1>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "u", int_arm_neon_vabau>; +defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "s", int_arm_neon_vabds, add>; +defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "u", int_arm_neon_vabdu, add>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD, - "vabal", "s", int_arm_neon_vabals>; -defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD, - "vabal", "u", int_arm_neon_vabalu>; +defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, + "vabal", "s", int_arm_neon_vabds, zext, add>; +defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, + "vabal", "u", int_arm_neon_vabdu, zext, add>; // Vector Maximum and Minimum. @@ -3113,8 +3532,8 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; // VMOVN : Vector Narrowing Move -defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, - "vmovn", "i", int_arm_neon_vmovn>; +defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, + "vmovn", "i", trunc>; // VQMOVN : Vector Saturating Narrowing Move defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn", "s", int_arm_neon_vqmovns>; @@ -3123,10 +3542,8 @@ defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun", "s", int_arm_neon_vqmovnsu>; // VMOVL : Vector Lengthening Move -defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s", - int_arm_neon_vmovls>; -defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u", - int_arm_neon_vmovlu>; +defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; +defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; // Vector Conversions. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index bc0790dccbb5..a13ff1232749 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -221,9 +221,13 @@ def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 // ADD rd, sp, #imm8 +// This is rematerializable, which is particularly useful for taking the +// address of locals. +let isReMaterializable = 1 in { def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi, "add\t$dst, $sp, $rhs", []>, T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8 +} // ADD sp, sp, #imm7 def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, @@ -251,19 +255,6 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, let Inst{2-0} = 0b101; } -// Pseudo instruction that will expand into a tSUBspi + a copy. -let usesCustomInserter = 1 in { // Expanded after instruction selection. -def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), - NoItinerary, "${:comment} sub\t$dst, $rhs", []>; - -def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - NoItinerary, "${:comment} add\t$dst, $rhs", []>; - -let Defs = [CPSR] in -def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - NoItinerary, "${:comment} and\t$dst, $rhs", []>; -} // usesCustomInserter - //===----------------------------------------------------------------------===// // Control Flow Instructions. // @@ -378,7 +369,7 @@ let isBranch = 1, isTerminator = 1 in { def tBR_JTr : T1JTI<(outs), (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt", + IIC_Br, "mov\tpc, $target\n\t.align\t2$jt", [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>, Encoding16 { let Inst{15-7} = 0b010001101; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index bbe675e81ab1..6ba0a44be470 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -32,7 +32,7 @@ def t2_so_reg : Operand<i32>, // reg imm ComplexPattern<i32, 2, "SelectT2ShifterOperandReg", [shl,srl,sra,rotr]> { let PrintMethod = "printT2SOOperand"; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops rGPR, i32imm); } // t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value @@ -51,10 +51,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // represented in the imm field in the same 12-bit form that they are encoded // into t2_so_imm instructions: the 8-bit immediate is the least significant // bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11]. -def t2_so_imm : Operand<i32>, - PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; -}]>; +def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]>; // t2_so_imm_not - Match an immediate that is a complement // of a t2_so_imm. @@ -162,7 +159,7 @@ def t2am_imm8s4_offset : Operand<i32> { def t2addrmode_so_reg : Operand<i32>, ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> { let PrintMethod = "printT2AddrModeSoRegOperand"; - let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); + let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm); } @@ -176,9 +173,9 @@ def t2addrmode_so_reg : Operand<i32>, multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, bit Cheap = 0, bit ReMat = 0> { // shifted imm - def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, + def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, opc, "\t$dst, $src", - [(set GPR:$dst, (opnode t2_so_imm:$src))]> { + [(set rGPR:$dst, (opnode t2_so_imm:$src))]> { let isAsCheapAsAMove = Cheap; let isReMaterializable = ReMat; let Inst{31-27} = 0b11110; @@ -189,9 +186,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, + def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set rGPR:$dst, (opnode rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -202,9 +199,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, + def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode t2_so_reg:$src))]> { + [(set rGPR:$dst, (opnode t2_so_reg:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -217,11 +214,11 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, /// binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0, string wide =""> { + bit Commutable = 0, string wide = ""> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -229,9 +226,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -242,9 +239,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -259,23 +256,35 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode, T2I_bin_irs<opcod, opc, opnode, Commutable, ".w">; /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are -/// reversed. It doesn't define the 'rr' form since it's handled by its -/// T2I_bin_irs counterpart. -multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> { +/// reversed. The 'rr' form is only defined for the disassembler; for codegen +/// it is equivalent to the T2I_bin_irs counterpart. +multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, opc, ".w\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; let Inst{20} = ?; // The S bit. let Inst{15} = 0; } + // register + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, rGPR:$lhs), IIC_iALUr, + opc, "\t$dst, $rhs, $lhs", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = ?; // The S bit. + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type + } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, opc, "\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -289,9 +298,9 @@ let Defs = [CPSR] in { multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -299,9 +308,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -312,9 +321,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -328,9 +337,12 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + // The register-immediate version is re-materializable. This is useful + // in particular for taking the address of a local. + let isReMaterializable = 1 in { + def ri : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24} = 1; @@ -338,10 +350,11 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{20} = 0; // The S bit. let Inst{15} = 0; } + } // 12-bit imm - def ri12 : T2I<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi, + def ri12 : T2I<(outs rGPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi, !strconcat(opc, "w"), "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24} = 0; @@ -350,9 +363,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -364,9 +377,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24} = 1; @@ -382,9 +395,9 @@ let Uses = [CPSR] in { multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -393,9 +406,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>, Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; @@ -407,9 +420,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -423,9 +436,9 @@ let Defs = [CPSR] in { multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -434,9 +447,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>, Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; @@ -448,9 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -461,13 +474,14 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, } } -/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit. +/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register +/// version is not needed since this is only for codegen. let Defs = [CPSR] in { multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + def ri : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -475,9 +489,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { let Inst{15} = 0; } // shifted register - def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + def rs : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, !strconcat(opc, "s"), "\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -490,18 +504,18 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // rotate operation that produces a value. multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { // 5-bit imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, imm1_31:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-21} = 0b010010; let Inst{19-16} = 0b1111; // Rn let Inst{5-4} = opcod; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iMOVsr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-21} = opcod; @@ -513,7 +527,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { /// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test /// patterns. Similar to T2I_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. -let Defs = [CPSR] in { +let isCompare = 1, Defs = [CPSR] in { multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi, @@ -527,9 +541,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> { let Inst{11-8} = 0b1111; // Rd } // register - def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, + def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr, opc, ".w\t$lhs, $rhs", - [(opnode GPR:$lhs, GPR:$rhs)]> { + [(opnode GPR:$lhs, rGPR:$rhs)]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -639,9 +653,9 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { /// T2I_unary_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set rGPR:$dst, (opnode rGPR:$src))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -650,9 +664,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, opc, ".w\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -665,9 +679,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { // UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, opc, "\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]>, + [(set rGPR:$dst, (opnode rGPR:$src))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -677,9 +691,9 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, + [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -694,7 +708,7 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { // SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern // supported yet. multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, opc, "\t$dst, $src", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -704,7 +718,7 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -719,9 +733,9 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { /// T2I_bin_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, + def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr, opc, "\t$dst, $LHS, $RHS", - [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>, + [(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -730,10 +744,10 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot), IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", - [(set GPR:$dst, (opnode GPR:$LHS, - (rotr GPR:$RHS, rot_imm:$rot)))]>, + [(set rGPR:$dst, (opnode rGPR:$LHS, + (rotr rGPR:$RHS, rot_imm:$rot)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -747,7 +761,7 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { // DO variant - disassembly only, no pattern multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { - def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, + def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr, opc, "\t$dst, $LHS, $RHS", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -756,7 +770,7 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot), IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -779,8 +793,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { // assembler. let neverHasSideEffects = 1 in { let isReMaterializable = 1 in -def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, - "adr$p.w\t$dst, #$label", []> { +def t2LEApcrel : T2XI<(outs rGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, + "adr${p}.w\t$dst, #$label", []> { let Inst{31-27} = 0b11110; let Inst{25-24} = 0b10; // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) @@ -790,9 +804,9 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, let Inst{15} = 0; } } // neverHasSideEffects -def t2LEApcrelJT : T2XI<(outs GPR:$dst), +def t2LEApcrelJT : T2XI<(outs rGPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, - "adr$p.w\t$dst, #${label}_${id}", []> { + "adr${p}.w\t$dst, #${label}_${id}", []> { let Inst{31-27} = 0b11110; let Inst{25-24} = 0b10; // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) @@ -866,9 +880,9 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), } // Signed and unsigned division on v7-M -def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, +def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, "sdiv", "\t$dst, $a, $b", - [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>, + [(set rGPR:$dst, (sdiv rGPR:$a, rGPR:$b))]>, Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; @@ -877,9 +891,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, let Inst{7-4} = 0b1111; } -def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, +def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, "udiv", "\t$dst, $a, $b", - [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>, + [(set rGPR:$dst, (udiv rGPR:$a, rGPR:$b))]>, Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; @@ -888,17 +902,6 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, let Inst{7-4} = 0b1111; } -// Pseudo instruction that will expand into a t2SUBrSPi + a copy. -let usesCustomInserter = 1 in { // Expanded after instruction selection. -def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>; -def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>; -def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>; -} // usesCustomInserter - - //===----------------------------------------------------------------------===// // Load / store Instructions. // @@ -917,10 +920,10 @@ defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword -def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), +def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2), (ins t2addrmode_imm8s4:$addr), IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>; -def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), +def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2), (ins i32imm:$addr), IIC_iLoadi, "ldrd", "\t$dst1, $addr", []> { let Inst{19-16} = 0b1111; // Rn @@ -967,6 +970,11 @@ def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr), def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), (t2LDRHpci tconstpool:$addr)>; +// FIXME: The destination register of the loads and stores can't be PC, but +// can be SP. We need another regclass (similar to rGPR) to represent +// that. Not a pressing issue since these are selected manually, +// not via pattern. + // Indexed loads let mayLoad = 1, neverHasSideEffects = 1 in { def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb), @@ -1286,9 +1294,9 @@ def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in -def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, +def t2MOVi : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, "mov", ".w\t$dst, $src", - [(set GPR:$dst, t2_so_imm:$src)]> { + [(set rGPR:$dst, t2_so_imm:$src)]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b0010; @@ -1298,9 +1306,9 @@ def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, } let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, +def t2MOVi16 : T2I<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", "\t$dst, $src", - [(set GPR:$dst, imm0_65535:$src)]> { + [(set rGPR:$dst, imm0_65535:$src)]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0010; @@ -1309,10 +1317,10 @@ def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, } let Constraints = "$src = $dst" in -def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, +def t2MOVTi16 : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$imm), IIC_iMOVi, "movt", "\t$dst, $imm", - [(set GPR:$dst, - (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]> { + [(set rGPR:$dst, + (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0110; @@ -1320,7 +1328,7 @@ def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, let Inst{15} = 0; } -def : T2Pat<(or GPR:$src, 0xffff0000), (t2MOVTi16 GPR:$src, 0xffff)>; +def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; //===----------------------------------------------------------------------===// // Extend Instructions. @@ -1352,10 +1360,14 @@ defm t2UXTH : T2I_unary_rrot<0b001, "uxth", defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; -def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>; -def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>; +// FIXME: This pattern incorrectly assumes the shl operator is a rotate. +// The transformation should probably be done as a combiner action +// instead so we can include a check for masking back in the upper +// eight bits of the source into the lower eight bits of the result. +//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF), +// (t2UXTB16r_rot rGPR:$Src, 24)>, Requires<[HasT2ExtractPack]>; +def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF), + (t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>; defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; @@ -1389,7 +1401,7 @@ defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc", BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; // RSB -defm t2RSB : T2I_rbin_is <0b1110, "rsb", +defm t2RSB : T2I_rbin_irs <0b1110, "rsb", BinOpFrag<(sub node:$LHS, node:$RHS)>>; defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>; @@ -1409,18 +1421,18 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; let AddedComplexity = 1 in -def : T2Pat<(addc GPR:$src, imm0_255_neg:$imm), - (t2SUBSri GPR:$src, imm0_255_neg:$imm)>; -def : T2Pat<(addc GPR:$src, t2_so_imm_neg:$imm), - (t2SUBSri GPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm), + (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>; +def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm), + (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>; // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. let AddedComplexity = 1 in -def : T2Pat<(adde GPR:$src, imm0_255_not:$imm), - (t2SBCSri GPR:$src, imm0_255_not:$imm)>; -def : T2Pat<(adde GPR:$src, t2_so_imm_not:$imm), - (t2SBCSri GPR:$src, t2_so_imm_not:$imm)>; +def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm), + (t2SBCSri rGPR:$src, imm0_255_not:$imm)>; +def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm), + (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>; // Select Bytes -- for disassembly only @@ -1437,9 +1449,10 @@ def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel", // A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned) // And Miscellaneous operations -- for disassembly only -class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc> - : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc, - "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> { +class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc, + list<dag> pat = [/* For disassembly only; pattern left blank */]> + : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), NoItinerary, opc, + "\t$dst, $a, $b", pat> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0101; let Inst{22-20} = op22_20; @@ -1449,14 +1462,16 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc> // Saturating add/subtract -- for disassembly only -def t2QADD : T2I_pam<0b000, 0b1000, "qadd">; +def t2QADD : T2I_pam<0b000, 0b1000, "qadd", + [(set rGPR:$dst, (int_arm_qadd rGPR:$a, rGPR:$b))]>; def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">; def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">; def t2QASX : T2I_pam<0b010, 0b0001, "qasx">; def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">; def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">; def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">; -def t2QSUB : T2I_pam<0b000, 0b1010, "qsub">; +def t2QSUB : T2I_pam<0b000, 0b1010, "qsub", + [(set rGPR:$dst, (int_arm_qsub rGPR:$a, rGPR:$b))]>; def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">; def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">; def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">; @@ -1498,37 +1513,27 @@ def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">; // Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only -def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), +def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b), NoItinerary, "usad8", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8", +def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), NoItinerary, "usada8", "\t$dst, $a, $b, $acc", []>; // Signed/Unsigned saturate -- for disassembly only -def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1100; - let Inst{20} = 0; - let Inst{15} = 0; - let Inst{21} = 0; // sh = '0' -} - -def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh), + NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1100; let Inst{20} = 0; let Inst{15} = 0; - let Inst{21} = 1; // sh = '1' } -def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, +def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary, "ssat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; @@ -1540,27 +1545,16 @@ def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, let Inst{7-6} = 0b00; // imm2 = '00' } -def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1110; - let Inst{20} = 0; - let Inst{15} = 0; - let Inst{21} = 0; // sh = '0' -} - -def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh), + NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1110; let Inst{20} = 0; let Inst{15} = 0; - let Inst{21} = 1; // sh = '1' } -def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, +def t2USAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary, "usat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; @@ -1572,6 +1566,9 @@ def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, let Inst{7-6} = 0b00; // imm2 = '00' } +def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>; + //===----------------------------------------------------------------------===// // Shift and rotate Instructions. // @@ -1582,9 +1579,9 @@ defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; let Uses = [CPSR] in { -def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, +def t2MOVrx : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, "rrx", "\t$dst, $src", - [(set GPR:$dst, (ARMrrx GPR:$src))]> { + [(set rGPR:$dst, (ARMrrx rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1596,9 +1593,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, } let Defs = [CPSR] in { -def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, +def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, "lsrs", ".w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> { + [(set rGPR:$dst, (ARMsrl_flag rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1609,9 +1606,9 @@ def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, let Inst{14-12} = 0b000; let Inst{7-6} = 0b01; } -def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, +def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, "asrs", ".w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsra_flag GPR:$src))]> { + [(set rGPR:$dst, (ARMsra_flag rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1638,10 +1635,13 @@ defm t2EOR : T2I_bin_w_irs<0b0100, "eor", defm t2BIC : T2I_bin_w_irs<0b0001, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; +defm t2ANDS : T2I_bin_s_irs<0b0000, "and", + BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>; + let Constraints = "$src = $dst" in -def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), +def t2BFC : T2I<(outs rGPR:$dst), (ins rGPR:$src, bf_inv_mask_imm:$imm), IIC_iUNAsi, "bfc", "\t$dst, $imm", - [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]> { + [(set rGPR:$dst, (and rGPR:$src, bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10110; @@ -1649,7 +1649,7 @@ def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), let Inst{15} = 0; } -def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), +def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -1657,7 +1657,7 @@ def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), let Inst{15} = 0; } -def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), +def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -1666,10 +1666,12 @@ def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), } // A8.6.18 BFI - Bitfield insert (Encoding T1) -// Added for disassembler with the pattern field purposely left blank. -// FIXME: Utilize this instruction in codgen. -def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iALUi, "bfi", "\t$dst, $src, $lsb, $width", []> { +let Constraints = "$src = $dst" in +def t2BFI : T2I<(outs rGPR:$dst), + (ins rGPR:$src, rGPR:$val, bf_inv_mask_imm:$imm), + IIC_iALUi, "bfi", "\t$dst, $val, $imm", + [(set rGPR:$dst, (ARMbfi rGPR:$src, rGPR:$val, + bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10110; @@ -1677,19 +1679,20 @@ def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), } defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS, - (not node:$RHS))>>; + (not node:$RHS))>, 0, "">; // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version let AddedComplexity = 1 in defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>; -def : T2Pat<(and GPR:$src, t2_so_imm_not:$imm), - (t2BICri GPR:$src, t2_so_imm_not:$imm)>; +let AddedComplexity = 1 in +def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), + (t2BICri rGPR:$src, t2_so_imm_not:$imm)>; // FIXME: Disable this pattern on Darwin to workaround an assembler bug. -def : T2Pat<(or GPR:$src, t2_so_imm_not:$imm), - (t2ORNri GPR:$src, t2_so_imm_not:$imm)>, +def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm), + (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>, Requires<[IsThumb2]>; def : T2Pat<(t2_so_imm_not:$src), @@ -1699,9 +1702,9 @@ def : T2Pat<(t2_so_imm_not:$src), // Multiply Instructions. // let isCommutable = 1 in -def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, +def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, "mul", "\t$dst, $a, $b", - [(set GPR:$dst, (mul GPR:$a, GPR:$b))]> { + [(set rGPR:$dst, (mul rGPR:$a, rGPR:$b))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -1709,9 +1712,9 @@ def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0000; // Multiply } -def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2MLA: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "mla", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]> { + [(set rGPR:$dst, (add (mul rGPR:$a, rGPR:$b), rGPR:$c))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -1719,9 +1722,9 @@ def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // Multiply } -def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2MLS: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "mls", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]> { + [(set rGPR:$dst, (sub rGPR:$c, (mul rGPR:$a, rGPR:$b)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -1732,7 +1735,8 @@ def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, // Extra precision multiplies with low / high results let neverHasSideEffects = 1 in { let isCommutable = 1 in { -def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, +def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMUL64, "smull", "\t$ldst, $hdst, $a, $b", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1740,7 +1744,8 @@ def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, let Inst{7-4} = 0b0000; } -def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, +def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMUL64, "umull", "\t$ldst, $hdst, $a, $b", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1750,7 +1755,8 @@ def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, } // isCommutable // Multiply + accumulate -def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, +def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMAC64, "smlal", "\t$ldst, $hdst, $a, $b", []>{ let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1758,7 +1764,8 @@ def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, let Inst{7-4} = 0b0000; } -def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, +def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMAC64, "umlal", "\t$ldst, $hdst, $a, $b", []>{ let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1766,7 +1773,8 @@ def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, let Inst{7-4} = 0b0000; } -def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, +def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMAC64, "umaal", "\t$ldst, $hdst, $a, $b", []>{ let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1778,9 +1786,9 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, // Rounding variants of the below included for disassembly only // Most significant word multiply -def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, +def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, "smmul", "\t$dst, $a, $b", - [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]> { + [(set rGPR:$dst, (mulhs rGPR:$a, rGPR:$b))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -1788,7 +1796,7 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, +def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, "smmulr", "\t$dst, $a, $b", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -1797,9 +1805,9 @@ def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) } -def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLA : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> { + [(set rGPR:$dst, (add (mulhs rGPR:$a, rGPR:$b), rGPR:$c))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -1807,7 +1815,7 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLAR: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -1816,9 +1824,9 @@ def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) } -def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLS: T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]> { + [(set rGPR:$dst, (sub rGPR:$c, (mulhs rGPR:$a, rGPR:$b)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; @@ -1826,7 +1834,7 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -1836,10 +1844,10 @@ def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, } multiclass T2I_smul<string opc, PatFrag opnode> { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), - (sext_inreg GPR:$b, i16)))]> { + [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16), + (sext_inreg rGPR:$b, i16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1848,10 +1856,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "bt"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16))))]> { + [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16), + (sra rGPR:$b, (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1860,10 +1868,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b01; } - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "tb"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), - (sext_inreg GPR:$b, i16)))]> { + [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)), + (sext_inreg rGPR:$b, i16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1872,10 +1880,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b10; } - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "tt"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16))))]> { + [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)), + (sra rGPR:$b, (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1884,10 +1892,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b11; } - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, + def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, !strconcat(opc, "wb"), "\t$dst, $a, $b", - [(set GPR:$dst, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16)))]> { + [(set rGPR:$dst, (sra (opnode rGPR:$a, + (sext_inreg rGPR:$b, i16)), (i32 16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1896,10 +1904,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, + def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, !strconcat(opc, "wt"), "\t$dst, $a, $b", - [(set GPR:$dst, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16)))]> { + [(set rGPR:$dst, (sra (opnode rGPR:$a, + (sra rGPR:$b, (i32 16))), (i32 16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1911,11 +1919,11 @@ multiclass T2I_smul<string opc, PatFrag opnode> { multiclass T2I_smla<string opc, PatFrag opnode> { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, - (opnode (sext_inreg GPR:$a, i16), - (sext_inreg GPR:$b, i16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, + (opnode (sext_inreg rGPR:$a, i16), + (sext_inreg rGPR:$b, i16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1924,10 +1932,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16)))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (opnode (sext_inreg rGPR:$a, i16), + (sra rGPR:$b, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1936,10 +1944,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b01; } - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sext_inreg GPR:$b, i16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)), + (sext_inreg rGPR:$b, i16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1948,10 +1956,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b10; } - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16)))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)), + (sra rGPR:$b, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1960,10 +1968,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b11; } - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a, + (sext_inreg rGPR:$b, i16)), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1972,10 +1980,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a, + (sra rGPR:$b, (i32 16))), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1989,61 +1997,61 @@ defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; // Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only -def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", +def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; -def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", +def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; -def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", +def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; -def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", +def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD // These are for disassembly only. -def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> { +def t2SMUAD: T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> { +def t2SMUADX:T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> { +def t2SMUSD: T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> { +def t2SMUSDX:T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad", +def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlad", "\t$dst, $a, $b, $acc", []>; -def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx", +def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smladx", "\t$dst, $a, $b, $acc", []>; -def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd", +def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsd", "\t$dst, $a, $b, $acc", []>; -def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx", +def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsdx", "\t$dst, $a, $b, $acc", []>; -def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald", +def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlald", "\t$ldst, $hdst, $a, $b", []>; -def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx", +def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaldx", "\t$ldst, $hdst, $a, $b", []>; -def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld", +def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsld", "\t$ldst, $hdst, $a, $b", []>; -def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx", +def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsldx", "\t$ldst, $hdst, $a, $b", []>; //===----------------------------------------------------------------------===// @@ -2061,35 +2069,35 @@ class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, let Inst{5-4} = op2; } -def t2CLZ : T2I_misc<0b11, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "clz", "\t$dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>; +def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, + "clz", "\t$dst, $src", [(set rGPR:$dst, (ctlz rGPR:$src))]>; -def t2RBIT : T2I_misc<0b01, 0b10, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, +def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, "rbit", "\t$dst, $src", - [(set GPR:$dst, (ARMrbit GPR:$src))]>; + [(set rGPR:$dst, (ARMrbit rGPR:$src))]>; -def t2REV : T2I_misc<0b01, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rev", ".w\t$dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>; +def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, + "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>; -def t2REV16 : T2I_misc<0b01, 0b01, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, +def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, "rev16", ".w\t$dst, $src", - [(set GPR:$dst, - (or (and (srl GPR:$src, (i32 8)), 0xFF), - (or (and (shl GPR:$src, (i32 8)), 0xFF00), - (or (and (srl GPR:$src, (i32 8)), 0xFF0000), - (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>; + [(set rGPR:$dst, + (or (and (srl rGPR:$src, (i32 8)), 0xFF), + (or (and (shl rGPR:$src, (i32 8)), 0xFF00), + (or (and (srl rGPR:$src, (i32 8)), 0xFF0000), + (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>; -def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, +def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, "revsh", ".w\t$dst, $src", - [(set GPR:$dst, + [(set rGPR:$dst, (sext_inreg - (or (srl (and GPR:$src, 0xFF00), (i32 8)), - (shl GPR:$src, (i32 8))), i16))]>; + (or (srl (and rGPR:$src, 0xFF00), (i32 8)), + (shl rGPR:$src, (i32 8))), i16))]>; -def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", - [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), - (and (shl GPR:$src2, (i32 imm:$shamt)), +def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh", + [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF), + (and (shl rGPR:$src2, lsl_amt:$sh), 0xFFFF0000)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; @@ -2100,18 +2108,20 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), } // Alternate cases for PKHBT where identities eliminate some nodes. -def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), - (t2PKHBT GPR:$src1, GPR:$src2, 0)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)), + (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>, Requires<[HasT2ExtractPack]>; -def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), - (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)), + (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>, Requires<[HasT2ExtractPack]>; -def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", - [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), - (and (sra GPR:$src2, imm16_31:$shamt), - 0xFFFF)))]>, +// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and +// will match the pattern below. +def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh", + [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF0000), + (and (sra rGPR:$src2, asr_amt:$sh), + 0xFFFF)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -2122,18 +2132,17 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))), - (t2PKHTB GPR:$src1, GPR:$src2, 16)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)), + (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>, Requires<[HasT2ExtractPack]>; -def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), - (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), - (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), + (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)), + (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>, Requires<[HasT2ExtractPack]>; //===----------------------------------------------------------------------===// // Comparison Instructions... // - defm t2CMP : T2I_cmp_irs<0b1101, "cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; defm t2CMPz : T2I_cmp_irs<0b1101, "cmp", @@ -2157,18 +2166,13 @@ defm t2TST : T2I_cmp_irs<0b0000, "tst", defm t2TEQ : T2I_cmp_irs<0b0100, "teq", BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>; -// A8.6.27 CBNZ, CBZ - Compare and branch on (non)zero. -// Short range conditional branch. Looks awesome for loops. Need to figure -// out how to use this one. - - // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { -def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, +def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr, "mov", ".w\t$dst, $true", - [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, + [/*(set rGPR:$dst, (ARMcmov rGPR:$false, rGPR:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst"> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -2179,9 +2183,9 @@ def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, let Inst{7-4} = 0b0000; } -def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true), +def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true), IIC_iCMOVi, "mov", ".w\t$dst, $true", -[/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>, +[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst"> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -2201,20 +2205,20 @@ class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{19-16} = 0b1111; // Rn let Inst{5-4} = opcod; // Shift type. } -def t2MOVCClsl : T2I_movcc_sh<0b00, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; -def t2MOVCClsr : T2I_movcc_sh<0b01, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; -def t2MOVCCasr : T2I_movcc_sh<0b10, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; -def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; } // neverHasSideEffects @@ -2225,21 +2229,15 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def t2Int_MemBarrierV7 : AInoP<(outs), (ins), - ThumbFrm, NoItinerary, - "dmb", "", - [(ARMMemBarrierV7)]>, - Requires<[IsThumb2]> { +def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "", + [(ARMMemBarrier)]>, Requires<[IsThumb, HasDB]> { let Inst{31-4} = 0xF3BF8F5; // FIXME: add support for options other than a full system DMB let Inst{3-0} = 0b1111; } -def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), - ThumbFrm, NoItinerary, - "dsb", "", - [(ARMSyncBarrierV7)]>, - Requires<[IsThumb2]> { +def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "", + [(ARMSyncBarrier)]>, Requires<[IsThumb, HasDB]> { let Inst{31-4} = 0xF3BF8F4; // FIXME: add support for options other than a full system DSB let Inst{3-0} = 0b1111; @@ -2329,13 +2327,13 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, +def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrexb", "\t$dest, [$ptr]", "", []>; -def t2LDREXH : T2I_ldrex<0b01, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, +def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrexh", "\t$dest, [$ptr]", "", []>; -def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, +def t2LDREX : Thumb2I<(outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrex", "\t$dest, [$ptr]", "", []> { @@ -2344,20 +2342,20 @@ def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, let Inst{11-8} = 0b1111; let Inst{7-0} = 0b00000000; // imm8 = 0 } -def t2LDREXD : T2I_ldrex<0b11, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), +def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$dest, rGPR:$dest2), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrexd", "\t$dest, $dest2, [$ptr]", "", [], {?, ?, ?, ?}>; } let mayStore = 1, Constraints = "@earlyclobber $success" in { -def t2STREXB : T2I_strex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), +def t2STREXB : T2I_strex<0b00, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strexb", "\t$success, $src, [$ptr]", "", []>; -def t2STREXH : T2I_strex<0b01, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), +def t2STREXH : T2I_strex<0b01, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strexh", "\t$success, $src, [$ptr]", "", []>; -def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), +def t2STREX : Thumb2I<(outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strex", "\t$success, $src, [$ptr]", "", []> { @@ -2365,8 +2363,8 @@ def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), let Inst{26-20} = 0b0000100; let Inst{7-0} = 0b00000000; // imm8 = 0 } -def t2STREXD : T2I_strex<0b11, (outs GPR:$success), - (ins GPR:$src, GPR:$src2, GPR:$ptr), +def t2STREXD : T2I_strex<0b11, (outs rGPR:$success), + (ins rGPR:$src, rGPR:$src2, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strexd", "\t$success, $src, $src2, [$ptr]", "", [], {?, ?, ?, ?}>; @@ -2416,7 +2414,7 @@ let Defs = D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31 ], hasSideEffects = 1, isBarrier = 1 in { - def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), + def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" "adds\t$val, #7\n\t" @@ -2425,14 +2423,14 @@ let Defs = "b\t1f\n\t" "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; } let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], hasSideEffects = 1, isBarrier = 1 in { - def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), + def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" "adds\t$val, #7\n\t" @@ -2441,7 +2439,7 @@ let Defs = "b\t1f\n\t" "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; } @@ -2482,7 +2480,7 @@ let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : T2JTI<(outs), (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target\n$jt", + IIC_Br, "mov\tpc, $target$jt", [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0100100; @@ -2496,7 +2494,7 @@ def t2BR_JT : def t2TBB : T2JTI<(outs), (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbb\t$index\n$jt", []> { + IIC_Br, "tbb\t$index$jt", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0001101; let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction) @@ -2507,7 +2505,7 @@ def t2TBB : def t2TBH : T2JTI<(outs), (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbh\t$index\n$jt", []> { + IIC_Br, "tbh\t$index$jt", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0001101; let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction) @@ -2560,7 +2558,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} -def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", +def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -2647,25 +2645,25 @@ def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode", } // Return From Exception is a system instruction -- for disassembly only -def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!", +def t2RFEDBW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfedb", "\t$base!", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000011; // W = 1 } -def t2RFEDB : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base", +def t2RFEDB : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeab", "\t$base", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000001; // W = 0 } -def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!", +def t2RFEIAW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base!", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0011011; // W = 1 } -def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base", +def t2RFEIA : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0011001; // W = 0 @@ -2676,26 +2674,26 @@ def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base", // // Two piece so_imms. -def : T2Pat<(or GPR:$LHS, t2_so_imm2part:$RHS), - (t2ORRri (t2ORRri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), +def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS), + (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS), - (t2EORri (t2EORri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), +def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS), + (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS), - (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), +def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS), + (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS), - (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)), +def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS), + (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)), (t2_so_neg_imm2part_2 imm:$RHS))>; // 32-bit immediate using movw + movt. // This is a single pseudo instruction to make it re-materializable. Remove // when we can do generalized remat. let isReMaterializable = 1 in -def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, +def t2MOVi32imm : T2Ix2<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", - [(set GPR:$dst, (i32 imm:$src))]>; + [(set rGPR:$dst, (i32 imm:$src))]>; // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, @@ -2723,7 +2721,7 @@ def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), // // Rd = Instr{11-8} -def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", +def t2MRS : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -2734,7 +2732,7 @@ def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", } // Rd = Instr{11-8} -def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", +def t2MRSsys : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -2745,7 +2743,7 @@ def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", } // Rn = Inst{19-16} -def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr", +def t2MSR : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr", "\tcpsr$mask, $src", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; @@ -2757,7 +2755,7 @@ def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr", } // Rn = Inst{19-16} -def t2MSRsys : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr", +def t2MSRsys : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr", "\tspsr$mask, $src", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 84c23e1a784c..c29e09606bd4 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -77,61 +77,61 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), // let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { -def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, +def VLDMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> { + "vldm${addr:submode}${p}\t$addr, $dsts", "", []> { let Inst{20} = 1; } -def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, +def VLDMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> { + "vldm${addr:submode}${p}\t$addr, $dsts", "", []> { let Inst{20} = 1; } -def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VLDMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}!, $dsts", - "$addr.base = $wb", []> { + "vldm${addr:submode}${p}\t$addr!, $dsts", + "$addr.addr = $wb", []> { let Inst{20} = 1; } -def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VLDMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}!, $dsts", - "$addr.base = $wb", []> { + "vldm${addr:submode}${p}\t$addr!, $dsts", + "$addr.addr = $wb", []> { let Inst{20} = 1; } } // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { -def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, +def VSTMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> { + "vstm${addr:submode}${p}\t$addr, $srcs", "", []> { let Inst{20} = 0; } -def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, +def VSTMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> { + "vstm${addr:submode}${p}\t$addr, $srcs", "", []> { let Inst{20} = 0; } -def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VSTMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}!, $srcs", - "$addr.base = $wb", []> { + "vstm${addr:submode}${p}\t$addr!, $srcs", + "$addr.addr = $wb", []> { let Inst{20} = 0; } -def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}!, $srcs", - "$addr.base = $wb", []> { + "vstm${addr:submode}${p}\t$addr!, $srcs", + "$addr.addr = $wb", []> { let Inst{20} = 0; } } // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq @@ -420,34 +420,35 @@ def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. // For disassembly only. - +let Uses = [FPSCR] in { def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> { let Inst{7} = 0; // Z bit } def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> { let Inst{7} = 0; // Z bit } def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> { let Inst{7} = 0; // Z bit } def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> { let Inst{7} = 0; // Z bit } +} // Convert between floating-point and fixed-point // Data type for fixed-point naming convention: @@ -460,6 +461,7 @@ let Constraints = "$a = $dst" in { // FP to Fixed-Point: +let isCodeGenOnly = 1 in { def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", @@ -499,9 +501,11 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]>; +} // Fixed-Point to FP: +let isCodeGenOnly = 1 in { def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", @@ -541,6 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]>; +} } // End of 'let Constraints = "$src = $dst" in' @@ -654,32 +659,27 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", } // FPSCR <-> GPR (for disassembly only) - -let neverHasSideEffects = 1 in { -let Uses = [FPSCR] in { -def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", - "\t$dst, fpscr", - [/* For disassembly only; pattern left blank */]> { +let hasSideEffects = 1, Uses = [FPSCR] in +def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, + "vmrs", "\t$dst, fpscr", + [(set GPR:$dst, (int_arm_get_fpscr))]> { let Inst{27-20} = 0b11101111; let Inst{19-16} = 0b0001; let Inst{11-8} = 0b1010; let Inst{7} = 0; let Inst{4} = 1; } -} -let Defs = [FPSCR] in { -def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", - "\tfpscr, $src", - [/* For disassembly only; pattern left blank */]> { +let Defs = [FPSCR] in +def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, + "vmsr", "\tfpscr, $src", + [(int_arm_set_fpscr GPR:$src)]> { let Inst{27-20} = 0b11101110; let Inst{19-16} = 0b0001; let Inst{11-8} = 0b1010; let Inst{7} = 0; let Inst{4} = 1; } -} -} // neverHasSideEffects // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index f80e316d23e8..2b7645a42119 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -57,7 +57,7 @@ STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); namespace { struct ARMLoadStoreOpt : public MachineFunctionPass { static char ID; - ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} + ARMLoadStoreOpt() : MachineFunctionPass(ID) {} const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -193,20 +193,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); - if (isAM4 && Offset == 4) { - if (isThumb2) - // Thumb2 does not support ldmib / stmib. - return false; + // VFP and Thumb2 do not support IB or DA modes. + bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); + bool haveIBAndDA = isNotVFP && !isThumb2; + if (Offset == 4 && haveIBAndDA) Mode = ARM_AM::ib; - } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { - if (isThumb2) - // Thumb2 does not support ldmda / stmda. - return false; + else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) Mode = ARM_AM::da; - } else if (isAM4 && Offset == -4 * (int)NumRegs) { + else if (Offset == -4 * (int)NumRegs && isNotVFP) + // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; - } else if (Offset != 0) { + else if (Offset != 0) { // If starting offset isn't zero, insert a MI to materialize a new base. // But only do so if it is cost effective, i.e. merging more than two // loads / stores. @@ -246,18 +243,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, BaseKill = true; // New base is always killed right its use. } - bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD); bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); Opcode = getLoadStoreMultipleOpcode(Opcode); - MachineInstrBuilder MIB = (isAM4) - ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) - : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs)) - .addImm(Pred).addReg(PredReg); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg); for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -333,6 +324,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, if (KilledRegs.count(Reg)) { unsigned j = Killer[Reg]; memOps[j].MBBI->getOperand(0).setIsKill(false); + memOps[j].isKill = false; } } MBB.erase(memOps[i].MBBI); @@ -348,7 +340,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, SmallVector<MachineBasicBlock::iterator, 4> &Merges) { - bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); + bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); int Offset = MemOps[SIndex].Offset; int SOffset = Offset; unsigned insertAfter = SIndex; @@ -366,12 +358,12 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Reg = MO.getReg(); unsigned RegNum = MO.isUndef() ? UINT_MAX : ARMRegisterInfo::getRegisterNumbering(Reg); - // AM4 - register numbers in ascending order. - // AM5 - consecutive register numbers in ascending order. - // Can only do up to 16 double-word registers per insn. + // Register numbers must be in ascending order. For VFP, the registers + // must also be consecutive and there is a limit of 16 double-word + // registers per instruction. if (Reg != ARM::SP && NewOffset == Offset + (int)Size && - ((isAM4 && RegNum > PRegNum) + ((isNotVFP && RegNum > PRegNum) || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { Offset += Size; PRegNum = RegNum; @@ -409,7 +401,7 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, return false; // Make sure the offset fits in 8 bits. - if (Bytes <= 0 || (Limit && Bytes >= Limit)) + if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME @@ -433,7 +425,7 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, MI->getOpcode() != ARM::ADDri) return false; - if (Bytes <= 0 || (Limit && Bytes >= Limit)) + if (Bytes == 0 || (Limit && Bytes >= Limit)) // Make sure the offset fits in 8 bits. return false; @@ -464,12 +456,12 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STM: case ARM::t2LDM: case ARM::t2STM: - return (MI->getNumOperands() - 4) * 4; case ARM::VLDMS: case ARM::VSTMS: + return (MI->getNumOperands() - 4) * 4; case ARM::VLDMD: case ARM::VSTMD: - return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; + return (MI->getNumOperands() - 4) * 8; } } @@ -512,26 +504,17 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); - bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM || - Opcode == ARM::STM || Opcode == ARM::t2STM); bool DoMerge = false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - unsigned Offset = 0; - if (isAM4) { - // Can't use an updating ld/st if the base register is also a dest - // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. - for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { - if (MI->getOperand(i).getReg() == Base) - return false; - } - Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); - } else { - // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. - Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); - Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); + // Can't use an updating ld/st if the base register is also a dest + // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. + for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).getReg() == Base) + return false; } + Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); // Try merging with the previous instruction. MachineBasicBlock::iterator BeginMBBI = MBB.begin(); @@ -539,22 +522,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator PrevMBBI = prior(MBBI); while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) --PrevMBBI; - if (isAM4) { - if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - Mode = ARM_AM::db; - } else if (isAM4 && Mode == ARM_AM::ib && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - Mode = ARM_AM::da; - } - } else { - if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - Mode = ARM_AM::db; - DoMerge = true; - } + if (Mode == ARM_AM::ia && + isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { + Mode = ARM_AM::db; + DoMerge = true; + } else if (Mode == ARM_AM::ib && + isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { + Mode = ARM_AM::da; + DoMerge = true; } if (DoMerge) MBB.erase(PrevMBBI); @@ -566,19 +541,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) ++NextMBBI; - if (isAM4) { - if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && - isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && - isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } - } else { - if (Mode == ARM_AM::ia && - isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } + if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && + isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { + DoMerge = true; + } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && + isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { + DoMerge = true; } if (DoMerge) { if (NextMBBI == I) { @@ -595,16 +563,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register - .addReg(Base, getKillRegState(BaseKill)); - if (isAM4) { - // [t2]LDM_UPD, [t2]STM_UPD - MIB.addImm(ARM_AM::getAM4ModeImm(Mode)) - .addImm(Pred).addReg(PredReg); - } else { - // VLDM[SD}_UPD, VSTM[SD]_UPD - MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset)) - .addImm(Pred).addReg(PredReg); - } + .addReg(Base, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(Mode)) + .addImm(Pred).addReg(PredReg); // Transfer the rest of operands. for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.addOperand(MI->getOperand(OpNum)); @@ -736,11 +697,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (!DoMerge) return false; - bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; unsigned Offset = 0; if (isAM5) - Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia, - (isDPR ? 2 : 1)); + Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ? + ARM_AM::db : ARM_AM::ia); else if (isAM2) Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); else @@ -748,6 +708,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (isAM5) { // VLDM[SD}_UPD, VSTM[SD]_UPD + // (There are no base-updating versions of VLDR/VSTR instructions, but the + // updating load/store-multiple instructions can be used with only one + // register.) MachineOperand &MO = MI->getOperand(0); BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register @@ -1268,7 +1231,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { namespace { struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ static char ID; - ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} + ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} const TargetData *TD; const TargetInstrInfo *TII; diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index ab2b06b60783..ab2b06b60783 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/ARMMCInstLower.h index b81a30690ce2..b81a30690ce2 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h +++ b/lib/Target/ARM/ARMMCInstLower.h diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 7e57a1ca5576..514c26b4daf0 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -43,6 +43,10 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// processFunctionBeforeCalleeSavedScan(). bool HasStackFrame; + /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by + /// emitPrologue. + bool RestoreSPFromFP; + /// LRSpilledForFarJump - True if the LR register has been for spilled to /// enable far jump. bool LRSpilledForFarJump; @@ -95,7 +99,7 @@ public: ARMFunctionInfo() : isThumb(false), hasThumb2(false), - VarArgsRegSaveSize(0), HasStackFrame(false), + VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), @@ -106,7 +110,7 @@ public: explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), - VarArgsRegSaveSize(0), HasStackFrame(false), + VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), @@ -125,6 +129,9 @@ public: bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } + bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; } + void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; } + bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; } void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; } diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index d020f3c74bde..305b232e6a99 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===// +//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -220,41 +220,11 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // FP is R11, R9 is available. - static const unsigned ARM_GPR_AO_1[] = { + static const unsigned ARM_GPR_AO[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R9, ARM::R10, - ARM::R11 }; - // FP is R11, R9 is not available. - static const unsigned ARM_GPR_AO_2[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R10, - ARM::R11 }; - // FP is R7, R9 is available as non-callee-saved register. - // This is used by Darwin. - static const unsigned ARM_GPR_AO_3[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R9, ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, - ARM::R8, ARM::R10,ARM::R11,ARM::R7 }; - // FP is R7, R9 is not available. - static const unsigned ARM_GPR_AO_4[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, - ARM::R8, ARM::R10,ARM::R11, - ARM::R7 }; - // FP is R7, R9 is available as callee-saved register. - // This is used by non-Darwin platform in Thumb mode. - static const unsigned ARM_GPR_AO_5[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, - ARM::R8, ARM::R9, ARM::R10,ARM::R11,ARM::R7 }; + ARM::R8, ARM::R9, ARM::R10, ARM::R11 }; // For Thumb1 mode, we don't want to allocate hi regs at all, as we // don't know how to spill them. If we make our prologue/epilogue code @@ -270,85 +240,71 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); if (Subtarget.isThumb1Only()) return THUMB_GPR_AO; - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - return ARM_GPR_AO_4; - else - return ARM_GPR_AO_3; - } else { - if (Subtarget.isR9Reserved()) - return ARM_GPR_AO_2; - else if (Subtarget.isThumb()) - return ARM_GPR_AO_5; - else - return ARM_GPR_AO_1; - } + return ARM_GPR_AO; } GPRClass::iterator GPRClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - GPRClass::iterator I; - - if (Subtarget.isThumb1Only()) { - I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned)); - // Mac OS X requires FP not to be clobbered for backtracing purpose. - return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; - } - - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned)); - else - I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned)); - } else { - if (Subtarget.isR9Reserved()) - I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned)); - else if (Subtarget.isThumb()) - I = ARM_GPR_AO_5 + (sizeof(ARM_GPR_AO_5)/sizeof(unsigned)); - else - I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned)); - } - - // Mac OS X requires FP not to be clobbered for backtracing purpose. - return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; + if (Subtarget.isThumb1Only()) + return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned)); + return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned)); } }]; } -// Thumb registers are R0-R7 normally. Some instructions can still use -// the general GPR register class above (MOV, e.g.) -def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { +// restricted GPR register class. Many Thumb2 instructions allow the full +// register range for operands, but have undefined behaviours when PC +// or SP (R13 or R15) are used. The ARM ARM refers to these operands +// via the BadReg() pseudo-code description. +def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, + R7, R8, R9, R10, R11, R12, LR]> { let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - static const unsigned THUMB_tGPR_AO[] = { + static const unsigned ARM_rGPR_AO[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R9, ARM::R10, + ARM::R11 }; + + // For Thumb1 mode, we don't want to allocate hi regs at all, as we + // don't know how to spill them. If we make our prologue/epilogue code + // smarter at some point, we can go back to using the above allocation + // orders for the Thumb1 instructions that know how to use hi regs. + static const unsigned THUMB_rGPR_AO[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; - // FP is R7, only low registers available. - tGPRClass::iterator - tGPRClass::allocation_order_begin(const MachineFunction &MF) const { - return THUMB_tGPR_AO; + rGPRClass::iterator + rGPRClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + if (Subtarget.isThumb1Only()) + return THUMB_rGPR_AO; + return ARM_rGPR_AO; } - tGPRClass::iterator - tGPRClass::allocation_order_end(const MachineFunction &MF) const { + rGPRClass::iterator + rGPRClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - tGPRClass::iterator I = - THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned)); - // Mac OS X requires FP not to be clobbered for backtracing purpose. - return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; + + if (Subtarget.isThumb1Only()) + return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned)); + return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned)); } }]; } +// Thumb registers are R0-R7 normally. Some instructions can still use +// the general GPR register class above (MOV, e.g.) +def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {} + // For tail calls, we can't use callee-saved registers, as they are restored // to the saved value before the tail call, which would clobber a call address. // Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of @@ -381,36 +337,20 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> { const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); if (Subtarget.isThumb1Only()) return THUMB_GPR_AO_TC; - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - return ARM_GPR_NOR9_TC; - else - return ARM_GPR_R9_TC; - } else - // R9 is either callee-saved or reserved; can't use it. - return ARM_GPR_NOR9_TC; + return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC; } tcGPRClass::iterator tcGPRClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - GPRClass::iterator I; - - if (Subtarget.isThumb1Only()) { - I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned)); - return I; - } - - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); - else - I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)); - } else - // R9 is either callee-saved or reserved; can't use it. - I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); - return I; + + if (Subtarget.isThumb1Only()) + return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned)); + + return Subtarget.isTargetDarwin() ? + ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) : + ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); } }]; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 10fd257055fb..cb539f4c01ec 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -33,14 +33,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , ARMFPUType(None) , UseNEONForSinglePrecisionFP(false) , SlowVMLx(false) + , SlowFPBrcc(false) , IsThumb(isT) , ThumbMode(Thumb1) + , NoARM(false) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) , UseMovt(UseMOVT) , HasFP16(false) , HasHardwareDivide(false) , HasT2ExtractPack(false) + , HasDataBarrier(false) + , Pref32BitThumb(false) + , FPOnlySP(false) , stackAlignment(4) , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index e7d92ede9b98..67e58038ee77 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,7 +26,7 @@ class GlobalValue; class ARMSubtarget : public TargetSubtarget { protected: enum ARMArchEnum { - V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M + V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M }; enum ARMFPEnum { @@ -63,6 +63,9 @@ protected: /// ThumbMode - Indicates supported Thumb version. ThumbTypeEnum ThumbMode; + /// NoARM - True if subtarget does not support ARM mode execution. + bool NoARM; + /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; @@ -84,6 +87,18 @@ protected: /// instructions. bool HasT2ExtractPack; + /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier + /// instructions. + bool HasDataBarrier; + + /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions + /// over 16-bit ones. + bool Pref32BitThumb; + + /// FPOnlySP - If true, the floating point unit only supports single + /// precision. + bool FPOnlySP; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -128,6 +143,8 @@ protected: bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; } bool hasV7Ops() const { return ARMArchVersion >= V7A; } + bool hasARMOps() const { return !NoARM; } + bool hasVFP2() const { return ARMFPUType >= VFPv2; } bool hasVFP3() const { return ARMFPUType >= VFPv3; } bool hasNEON() const { return ARMFPUType >= NEON; } @@ -135,8 +152,11 @@ protected: return hasNEON() && UseNEONForSinglePrecisionFP; } bool hasDivide() const { return HasHardwareDivide; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } + bool hasDataBarrier() const { return HasDataBarrier; } bool useVMLx() const {return hasVFP2() && !SlowVMLx; } bool isFPBrccSlow() const { return SlowFPBrcc; } + bool isFPOnlySP() const { return FPOnlySP; } + bool prefers32BitThumb() const { return Pref32BitThumb; } bool hasFP16() const { return HasFP16; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 09203f9304df..30ff8276cdaa 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -31,7 +31,6 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } } - extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); @@ -66,6 +65,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, "v128:64:128-v64:64:64-n32")), TLInfo(*this), TSInfo(*this) { + if (!Subtarget.hasARMOps()) + report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " + "support ARM mode execution!"); } ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, @@ -85,9 +87,15 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, TSInfo(*this) { } +// Pass Pipeline Configuration +bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (OptLevel != CodeGenOpt::None) + PM.add(createARMGlobalMergePass(getTargetLowering())); + return false; +} -// Pass Pipeline Configuration bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { PM.add(createARMISelDag(*this, OptLevel)); @@ -132,7 +140,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (Subtarget.isThumb2()) + if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb()) PM.add(createThumb2SizeReductionPass()); PM.add(createARMConstantIslandPass()); diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index a222e57b13ff..17e5425a9d37 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -50,6 +50,7 @@ public: } // Pass Pipeline Configuration + virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 4b083244b241..75e2a739bf1f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMSubtarget.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -18,8 +19,10 @@ #include "llvm/Target/TargetAsmParser.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" using namespace llvm; @@ -37,6 +40,7 @@ enum ShiftType { class ARMAsmParser : public TargetAsmParser { MCAsmParser &Parser; + TargetMachine &TM; private: MCAsmParser &getParser() const { return Parser; } @@ -76,26 +80,33 @@ private: bool ParseDirectiveSyntax(SMLoc L); - // TODO - For now hacked versions of the next two are in here in this file to - // allow some parser testing until the table gen versions are implemented. + bool MatchInstruction(SMLoc IDLoc, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCInst &Inst) { + if (!MatchInstructionImpl(Operands, Inst)) + return false; + + // FIXME: We should give nicer diagnostics about the exact failure. + Error(IDLoc, "unrecognized instruction"); + + return true; + } /// @name Auto-generated Match Functions /// { - bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCInst &Inst); - /// MatchRegisterName - Match the given string to a register name and return - /// its register number, or -1 if there is no match. To allow return values - /// to be used directly in register lists, arm registers have values between - /// 0 and 15. - int MatchRegisterName(StringRef Name); + unsigned ComputeAvailableFeatures(const ARMSubtarget *Subtarget) const; + + bool MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*> + &Operands, + MCInst &Inst); /// } public: - ARMAsmParser(const Target &T, MCAsmParser &_Parser) - : TargetAsmParser(T), Parser(_Parser) {} + ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) + : TargetAsmParser(T), Parser(_Parser), TM(_TM) {} virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -110,16 +121,21 @@ private: ARMOperand() {} public: enum KindTy { - Token, - Register, + CondCode, Immediate, - Memory + Memory, + Register, + Token } Kind; SMLoc StartLoc, EndLoc; union { struct { + ARMCC::CondCodes Val; + } CC; + + struct { const char *Data; unsigned Length; } Tok; @@ -151,16 +167,19 @@ public: }; - ARMOperand(KindTy K, SMLoc S, SMLoc E) - : Kind(K), StartLoc(S), EndLoc(E) {} + //ARMOperand(KindTy K, SMLoc S, SMLoc E) + // : Kind(K), StartLoc(S), EndLoc(E) {} ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; switch (Kind) { + case CondCode: + CC = o.CC; + break; case Token: - Tok = o.Tok; + Tok = o.Tok; break; case Register: Reg = o.Reg; @@ -179,6 +198,11 @@ public: /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } + ARMCC::CondCodes getCondCode() const { + assert(Kind == CondCode && "Invalid access!"); + return CC.Val; + } + StringRef getToken() const { assert(Kind == Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); @@ -194,15 +218,50 @@ public: return Imm.Val; } - bool isToken() const {return Kind == Token; } + bool isCondCode() const { return Kind == CondCode; } + + bool isImm() const { return Kind == Immediate; } bool isReg() const { return Kind == Register; } + bool isToken() const {return Kind == Token; } + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + + void addCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); + // FIXME: What belongs here? + Inst.addOperand(MCOperand::CreateReg(0)); + } + void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); } + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + virtual void dump(raw_ostream &OS) const; + + static void CreateCondCode(OwningPtr<ARMOperand> &Op, ARMCC::CondCodes CC, + SMLoc S) { + Op.reset(new ARMOperand); + Op->Kind = CondCode; + Op->CC.Val = CC; + Op->StartLoc = S; + Op->EndLoc = S; + } + static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str, SMLoc S) { Op.reset(new ARMOperand); @@ -262,6 +321,33 @@ public: } // end anonymous namespace. +void ARMOperand::dump(raw_ostream &OS) const { + switch (Kind) { + case CondCode: + OS << ARMCondCodeToString(getCondCode()); + break; + case Immediate: + getImm()->print(OS); + break; + case Memory: + OS << "<memory>"; + break; + case Register: + OS << "<register " << getReg() << ">"; + break; + case Token: + OS << "'" << getToken() << "'"; + break; + } +} + +/// @name Auto-generated Match Functions +/// { + +static unsigned MatchRegisterName(StringRef Name); + +/// } + /// Try to parse a register name. The token must be an Identifier when called, /// and if it is a register name a Reg operand is created, the token is eaten /// and false is returned. Else true is returned and no token is eaten. @@ -548,77 +634,6 @@ bool ARMAsmParser::ParseShift(ShiftType &St, return false; } -/// A hack to allow some testing, to be replaced by a real table gen version. -int ARMAsmParser::MatchRegisterName(StringRef Name) { - if (Name == "r0" || Name == "R0") - return 0; - else if (Name == "r1" || Name == "R1") - return 1; - else if (Name == "r2" || Name == "R2") - return 2; - else if (Name == "r3" || Name == "R3") - return 3; - else if (Name == "r3" || Name == "R3") - return 3; - else if (Name == "r4" || Name == "R4") - return 4; - else if (Name == "r5" || Name == "R5") - return 5; - else if (Name == "r6" || Name == "R6") - return 6; - else if (Name == "r7" || Name == "R7") - return 7; - else if (Name == "r8" || Name == "R8") - return 8; - else if (Name == "r9" || Name == "R9") - return 9; - else if (Name == "r10" || Name == "R10") - return 10; - else if (Name == "r11" || Name == "R11" || Name == "fp") - return 11; - else if (Name == "r12" || Name == "R12" || Name == "ip") - return 12; - else if (Name == "r13" || Name == "R13" || Name == "sp") - return 13; - else if (Name == "r14" || Name == "R14" || Name == "lr") - return 14; - else if (Name == "r15" || Name == "R15" || Name == "pc") - return 15; - return -1; -} - -/// A hack to allow some testing, to be replaced by a real table gen version. -bool ARMAsmParser:: -MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCInst &Inst) { - ARMOperand &Op0 = *(ARMOperand*)Operands[0]; - assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); - StringRef Mnemonic = Op0.getToken(); - if (Mnemonic == "add" || - Mnemonic == "stmfd" || - Mnemonic == "str" || - Mnemonic == "ldmfd" || - Mnemonic == "ldr" || - Mnemonic == "mov" || - Mnemonic == "sub" || - Mnemonic == "bl" || - Mnemonic == "push" || - Mnemonic == "blx" || - Mnemonic == "pop") { - // Hard-coded to a valid instruction, till we have a real matcher. - Inst = MCInst(); - Inst.setOpcode(ARM::MOVr); - Inst.addOperand(MCOperand::CreateReg(2)); - Inst.addOperand(MCOperand::CreateReg(2)); - Inst.addOperand(MCOperand::CreateImm(0)); - Inst.addOperand(MCOperand::CreateImm(0)); - Inst.addOperand(MCOperand::CreateReg(0)); - return false; - } - - return true; -} - /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) { @@ -661,12 +676,56 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) { bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { OwningPtr<ARMOperand> Op; - ARMOperand::CreateToken(Op, Name, NameLoc); - + + // Create the leading tokens for the mnemonic, split by '.' characters. + size_t Start = 0, Next = Name.find('.'); + StringRef Head = Name.slice(Start, Next); + + // Determine the predicate, if any. + // + // FIXME: We need a way to check whether a prefix supports predication, + // otherwise we will end up with an ambiguity for instructions that happen to + // end with a predicate name. + unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2)) + .Case("eq", ARMCC::EQ) + .Case("ne", ARMCC::NE) + .Case("hs", ARMCC::HS) + .Case("lo", ARMCC::LO) + .Case("mi", ARMCC::MI) + .Case("pl", ARMCC::PL) + .Case("vs", ARMCC::VS) + .Case("vc", ARMCC::VC) + .Case("hi", ARMCC::HI) + .Case("ls", ARMCC::LS) + .Case("ge", ARMCC::GE) + .Case("lt", ARMCC::LT) + .Case("gt", ARMCC::GT) + .Case("le", ARMCC::LE) + .Case("al", ARMCC::AL) + .Default(~0U); + if (CC != ~0U) { + Head = Head.slice(0, Head.size() - 2); + } else + CC = ARMCC::AL; + + ARMOperand::CreateToken(Op, Head, NameLoc); Operands.push_back(Op.take()); - if (getLexer().isNot(AsmToken::EndOfStatement)) { + ARMOperand::CreateCondCode(Op, ARMCC::CondCodes(CC), NameLoc); + Operands.push_back(Op.take()); + + // Add the remaining tokens in the mnemonic. + while (Next != StringRef::npos) { + Start = Next; + Next = Name.find('.', Start + 1); + Head = Name.slice(Start, Next); + ARMOperand::CreateToken(Op, Head, NameLoc); + Operands.push_back(Op.take()); + } + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. OwningPtr<ARMOperand> Op; if (ParseOperand(Op)) return true; @@ -809,3 +868,5 @@ extern "C" void LLVMInitializeARMAsmParser() { RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); LLVMInitializeARMAsmLexer(); } + +#include "ARMGenAsmMatcher.inc" diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index edc934549b28..8026e7718ca9 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -158,7 +158,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) && MI->getOperand(0).getReg() == ARM::SP) { const MCOperand &MO1 = MI->getOperand(2); - if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) { + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) { O << '\t' << "vpush"; printPredicateOperand(MI, 3, O); O << '\t'; @@ -171,7 +171,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) && MI->getOperand(0).getReg() == ARM::SP) { const MCOperand &MO1 = MI->getOperand(2); - if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) { + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) { O << '\t' << "vpop"; printPredicateOperand(MI, 3, O); O << '\t'; @@ -278,15 +278,13 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, O << getRegisterName(MO1.getReg()); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())) - << ' '; - + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (MO2.getReg()) { - O << getRegisterName(MO2.getReg()); + O << ' ' << getRegisterName(MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else { - O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } else if (ShOpc != ARM_AM::rrx) { + O << " #" << ARM_AM::getSORegOffset(MO3.getImm()); } } @@ -414,16 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, return; } - if (Modifier && strcmp(Modifier, "submode") == 0) { - ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - O << ARM_AM::getAMSubModeStr(Mode); - return; - } else if (Modifier && strcmp(Modifier, "base") == 0) { - // Used for FSTM{D|S} and LSTM{D|S} operations. - O << getRegisterName(MO1.getReg()); - return; - } - O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { @@ -463,9 +451,9 @@ void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum, assert(0 && "FIXME: Implement printAddrModePCOperand"); } -void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); uint32_t v = ~MO.getImm(); int32_t lsb = CountTrailingZeros_32(v); @@ -474,6 +462,31 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI, O << '#' << lsb << ", #" << width; } +void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned val = MI->getOperand(OpNum).getImm(); + O << ARM_MB::MemBOptToString(val); +} + +void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned ShiftOp = MI->getOperand(OpNum).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + switch (Opc) { + case ARM_AM::no_shift: + return; + case ARM_AM::lsl: + O << ", lsl #"; + break; + case ARM_AM::asr: + O << ", asr #"; + break; + default: + assert(0 && "unexpected shift opcode for shift immediate operand"); + } + O << ARM_AM::getSORegOffset(ShiftOp); +} + void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << "{"; @@ -669,12 +682,11 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, O << getRegisterName(Reg); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())) - << " "; - assert(MO2.isImm() && "Not a valid t2_so_reg value!"); - O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); + if (ShOpc != ARM_AM::rrx) + O << " #" << ARM_AM::getSORegOffset(MO2.getImm()); } void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI, diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index ddf5047793d2..e5ad0d07e9ba 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -57,6 +57,8 @@ public: void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt index 4e299f86ecb6..18645c0864a3 100644 --- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt +++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt @@ -1,8 +1,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMARMAsmPrinter - ARMAsmPrinter.cpp ARMInstPrinter.cpp - ARMMCInstLower.cpp ) add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen) diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 0df34666b959..6b4dee5965d2 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -7,25 +7,32 @@ tablegen(ARMGenInstrNames.inc -gen-instr-enums) tablegen(ARMGenInstrInfo.inc -gen-instr-desc) tablegen(ARMGenCodeEmitter.inc -gen-emitter) tablegen(ARMGenAsmWriter.inc -gen-asm-writer) +tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher) tablegen(ARMGenDAGISel.inc -gen-dag-isel) +tablegen(ARMGenFastISel.inc -gen-fast-isel) tablegen(ARMGenCallingConv.inc -gen-callingconv) tablegen(ARMGenSubtarget.inc -gen-subtarget) tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info) add_llvm_target(ARMCodeGen + ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp ARMExpandPseudoInsts.cpp + ARMFastISel.cpp + ARMGlobalMerge.cpp ARMISelDAGToDAG.cpp ARMISelLowering.cpp ARMInstrInfo.cpp ARMJITInfo.cpp ARMLoadStoreOptimizer.cpp ARMMCAsmInfo.cpp + ARMMCInstLower.cpp ARMRegisterInfo.cpp + ARMSelectionDAGInfo.cpp ARMSubtarget.cpp ARMTargetMachine.cpp ARMTargetObjectFile.cpp @@ -38,7 +45,6 @@ add_llvm_target(ARMCodeGen Thumb2InstrInfo.cpp Thumb2RegisterInfo.cpp Thumb2SizeReduction.cpp - ARMSelectionDAGInfo.cpp ) -target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG) +target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 4de697e8bf67..e22028985b46 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -26,6 +26,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +//#define DEBUG(X) do { X; } while (0) + /// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from /// ARMDecoderEmitter.cpp TableGen backend. It contains: /// @@ -87,6 +89,11 @@ static unsigned decodeARMInstruction(uint32_t &insn) { return ARM::BFI; } + // Ditto for STRBT, which is a super-instruction for A8.6.199 Encoding A1 & A2. + // As a result, the decoder fails to deocode USAT properly. + if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1) + return ARM::USAT; + // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8. // As a result, the decoder fails to decode UMULL properly. if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) { @@ -106,7 +113,7 @@ static unsigned decodeARMInstruction(uint32_t &insn) { // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2. // As a result, the decoder fails to deocode SSAT properly. if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1) - return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr; + return ARM::SSAT; // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147. // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT. @@ -291,7 +298,7 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) { /// decodeInstruction(insn) is invoked on the original insn. /// /// Otherwise, decodeThumbInstruction is called with the original insn. -static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) { +static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) { if (IsThumb2) { uint16_t op1 = slice(insn, 28, 27); uint16_t op2 = slice(insn, 26, 20); @@ -429,7 +436,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to // the top half followed by the second halfword. - uint32_t insn = 0; + unsigned insn = 0; // Possible second halfword. uint16_t insn1 = 0; diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index a07ff2832aa7..9f493b9aee02 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -20,6 +20,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +//#define DEBUG(X) do { X; } while (0) + /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const /// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s /// describing the operand info for each ARMInsts[i]. @@ -93,6 +95,9 @@ static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister, RegClassID = ARM::DPRRegClassID; } + // For this purpose, we can treat rGPR as if it were GPR. + if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID; + // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm(). unsigned RegNum = RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister; @@ -451,12 +456,23 @@ static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) { // // A8-11: DecodeImmShift() static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) { - // If type == 0b11 and imm5 == 0, we have an rrx, instead. - if (ShOp == ARM_AM::ror && ShImm == 0) - ShOp = ARM_AM::rrx; - // If (lsr or asr) and imm5 == 0, shift amount is 32. - if ((ShOp == ARM_AM::lsr || ShOp == ARM_AM::asr) && ShImm == 0) + if (ShImm != 0) + return; + switch (ShOp) { + case ARM_AM::no_shift: + case ARM_AM::rrx: + break; + case ARM_AM::lsl: + ShOp = ARM_AM::no_shift; + break; + case ARM_AM::lsr: + case ARM_AM::asr: ShImm = 32; + break; + case ARM_AM::ror: + ShOp = ARM_AM::rrx; + break; + } } // getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding @@ -490,9 +506,6 @@ static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) { static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { - if (Opcode == ARM::Int_MemBarrierV7 || Opcode == ARM::Int_SyncBarrierV7) - return true; - assert(0 && "Unexpected pseudo instruction!"); return false; } @@ -887,7 +900,6 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } - assert(0 && "Unexpected BrMiscFrm Opcode"); return false; } @@ -906,34 +918,6 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) { return true; } -static inline bool SaturateOpcode(unsigned Opcode) { - switch (Opcode) { - case ARM::SSATlsl: case ARM::SSATasr: case ARM::SSAT16: - case ARM::USATlsl: case ARM::USATasr: case ARM::USAT16: - return true; - default: - return false; - } -} - -static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) { - switch (Opcode) { - case ARM::SSATlsl: - case ARM::SSATasr: - return slice(insn, 20, 16) + 1; - case ARM::SSAT16: - return slice(insn, 19, 16) + 1; - case ARM::USATlsl: - case ARM::USATasr: - return slice(insn, 20, 16); - case ARM::USAT16: - return slice(insn, 19, 16); - default: - assert(0 && "Invalid opcode passed in"); - return 0; - } -} - // A major complication is the fact that some of the saturating add/subtract // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm. // They are QADD, QDADD, QDSUB, and QSUB. @@ -959,40 +943,14 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpIdx >= NumOps) return false; - // SSAT/SSAT16/USAT/USAT16 has imm operand after Rd. - if (SaturateOpcode(Opcode)) { - MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - - if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) { - OpIdx += 2; - return true; - } - - // For SSAT operand reg (Rm) has been disassembled above. - // Now disassemble the shift amount. - - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShAmt = slice(insn, 11, 7); - - // A8.6.183. Possible ASR shift amount of 32... - if (Opcode == ARM::SSATasr && ShAmt == 0) - ShAmt = 32; - - MI.addOperand(MCOperand::CreateImm(ShAmt)); - - OpIdx += 3; - return true; - } - // Special-case handling of BFC/BFI/SBFX/UBFX. if (Opcode == ARM::BFC || Opcode == ARM::BFI) { - // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI. - MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0 - : getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(0)); + if (Opcode == ARM::BFI) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); + ++OpIdx; + } uint32_t mask = 0; if (!getBFCInvMask(insn, mask)) return false; @@ -1498,13 +1456,55 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Extract the 5-bit immediate field Inst{11-7}. unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F; - MI.addOperand(MCOperand::CreateImm(ShiftAmt)); + ARM_AM::ShiftOpc Opc = ARM_AM::no_shift; + if (Opcode == ARM::PKHBT) + Opc = ARM_AM::lsl; + else if (Opcode == ARM::PKHBT) + Opc = ARM_AM::asr; + getImmShiftSE(Opc, ShiftAmt); + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt))); ++OpIdx; } return true; } +/// DisassembleSatFrm - Disassemble saturate instructions: +/// SSAT, SSAT16, USAT, and USAT16. +static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + + const TargetInstrDesc &TID = ARMInsts[Opcode]; + NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands + + // Disassemble register def. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + decodeRd(insn)))); + + unsigned Pos = slice(insn, 20, 16); + if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16) + Pos += 1; + MI.addOperand(MCOperand::CreateImm(Pos)); + + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + decodeRm(insn)))); + + if (NumOpsAdded == 4) { + ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl); + // Inst{11-7} encodes the imm5 shift amount. + unsigned ShAmt = slice(insn, 11, 7); + if (ShAmt == 0) { + // A8.6.183. Possible ASR shift amount of 32... + if (Opc == ARM_AM::asr) + ShAmt = 32; + else + Opc = ARM_AM::no_shift; + } + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt))); + } + return true; +} + // Extend instructions. // SXT* and UXT*: Rd [Rn] Rm [rot_imm]. // The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the @@ -1863,7 +1863,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3"); - bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS) ? true : false; + bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS); unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; // Extract Dd/Sd for operand 0. @@ -1886,7 +1886,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // VFP Load/Store Multiple Instructions. // This is similar to the algorithm for LDM/STM in that operand 0 (the base) and -// operand 1 (the AM5 mode imm) is followed by two predicate operands. It is +// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is // followed by a reglist of either DPR(s) or SPR(s). // // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD] @@ -1910,16 +1910,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(Base)); - // Next comes the AM5 Opcode. + // Next comes the AM4 Opcode. ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); // Must be either "ia" or "db" submode. if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) { - DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n"); + DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n"); return false; } - - unsigned char Imm8 = insn & 0xFF; - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8))); + MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode))); // Handling the two predicate operands before the reglist. int64_t CondVal = insn >> ARMII::CondShift; @@ -1929,13 +1927,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx += 4; bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD || - Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD) ? true : false; + Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD); unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; // Extract Dd/Sd. unsigned RegD = decodeVFPRd(insn, isSPVFP); // Fill the variadic part of reglist. + unsigned char Imm8 = insn & 0xFF; unsigned Regs = isSPVFP ? Imm8 : Imm8/2; for (unsigned i = 0; i < Regs; ++i) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, @@ -2244,9 +2243,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // We have homogeneous NEON registers for Load/Store. unsigned RegClass = 0; + bool DRegPair = UseDRegPair(Opcode); // Double-spaced registers have increments of 2. - unsigned Inc = DblSpaced ? 2 : 1; + unsigned Inc = (DblSpaced || DRegPair) ? 2 : 1; unsigned Rn = decodeRn(insn); unsigned Rm = decodeRm(insn); @@ -2292,8 +2292,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, RegClass = OpInfo[OpIdx].RegClass; while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd, - UseDRegPair(Opcode)))); + getRegisterEnum(B, RegClass, Rd, DRegPair))); Rd += Inc; ++OpIdx; } @@ -2312,8 +2311,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd, - UseDRegPair(Opcode)))); + getRegisterEnum(B, RegClass, Rd, DRegPair))); Rd += Inc; ++OpIdx; } @@ -2351,6 +2349,11 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } } + // Accessing registers past the end of the NEON register file is not + // defined. + if (Rd > 32) + return false; + return true; } @@ -2423,10 +2426,14 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, break; case ARM::VMOVv4i16: case ARM::VMOVv8i16: + case ARM::VMVNv4i16: + case ARM::VMVNv8i16: esize = ESize16; break; case ARM::VMOVv2i32: case ARM::VMOVv4i32: + case ARM::VMVNv2i32: + case ARM::VMVNv4i32: esize = ESize32; break; case ARM::VMOVv1i64: @@ -2944,7 +2951,7 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.6.49 ISB static inline bool MemBarrierInstr(uint32_t insn) { unsigned op7_4 = slice(insn, 7, 4); - if (slice(insn, 31, 20) == 0xf57 && (op7_4 >= 4 && op7_4 <= 6)) + if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6)) return true; return false; @@ -3001,8 +3008,15 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - if (MemBarrierInstr(insn)) + if (MemBarrierInstr(insn)) { + // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care + // of within the generic ARMBasicMCBuilder::BuildIt() method. + // + // Inst{3-0} encodes the memory barrier option for the variants. + MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0))); + NumOpsAdded = 1; return true; + } switch (Opcode) { case ARM::CLREX: @@ -3073,6 +3087,7 @@ static const DisassembleFP FuncPtrs[] = { &DisassembleLdStMulFrm, &DisassembleLdStExFrm, &DisassembleArithMiscFrm, + &DisassembleSatFrm, &DisassembleExtFrm, &DisassembleVFPUnaryFrm, &DisassembleVFPBinaryFrm, diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index 7d21256a14f9..9c30d332d1f2 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -23,7 +23,8 @@ #include "llvm/MC/MCInst.h" #include "llvm/Target/TargetInstrInfo.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" +#include "ARMRegisterInfo.h" #include "ARMDisassembler.h" namespace llvm { @@ -53,36 +54,35 @@ public: ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \ ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \ ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \ - ENTRY(ARM_FORMAT_EXTFRM, 13) \ - ENTRY(ARM_FORMAT_VFPUNARYFRM, 14) \ - ENTRY(ARM_FORMAT_VFPBINARYFRM, 15) \ - ENTRY(ARM_FORMAT_VFPCONV1FRM, 16) \ - ENTRY(ARM_FORMAT_VFPCONV2FRM, 17) \ - ENTRY(ARM_FORMAT_VFPCONV3FRM, 18) \ - ENTRY(ARM_FORMAT_VFPCONV4FRM, 19) \ - ENTRY(ARM_FORMAT_VFPCONV5FRM, 20) \ - ENTRY(ARM_FORMAT_VFPLDSTFRM, 21) \ - ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \ - ENTRY(ARM_FORMAT_VFPMISCFRM, 23) \ - ENTRY(ARM_FORMAT_THUMBFRM, 24) \ - ENTRY(ARM_FORMAT_NEONFRM, 25) \ - ENTRY(ARM_FORMAT_NEONGETLNFRM, 26) \ - ENTRY(ARM_FORMAT_NEONSETLNFRM, 27) \ - ENTRY(ARM_FORMAT_NEONDUPFRM, 28) \ - ENTRY(ARM_FORMAT_MISCFRM, 29) \ - ENTRY(ARM_FORMAT_THUMBMISCFRM, 30) \ - ENTRY(ARM_FORMAT_NLdSt, 31) \ - ENTRY(ARM_FORMAT_N1RegModImm, 32) \ - ENTRY(ARM_FORMAT_N2Reg, 33) \ - ENTRY(ARM_FORMAT_NVCVT, 34) \ - ENTRY(ARM_FORMAT_NVecDupLn, 35) \ - ENTRY(ARM_FORMAT_N2RegVecShL, 36) \ - ENTRY(ARM_FORMAT_N2RegVecShR, 37) \ - ENTRY(ARM_FORMAT_N3Reg, 38) \ - ENTRY(ARM_FORMAT_N3RegVecSh, 39) \ - ENTRY(ARM_FORMAT_NVecExtract, 40) \ - ENTRY(ARM_FORMAT_NVecMulScalar, 41) \ - ENTRY(ARM_FORMAT_NVTBL, 42) + ENTRY(ARM_FORMAT_SATFRM, 13) \ + ENTRY(ARM_FORMAT_EXTFRM, 14) \ + ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \ + ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \ + ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \ + ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \ + ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \ + ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \ + ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \ + ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \ + ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \ + ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \ + ENTRY(ARM_FORMAT_THUMBFRM, 25) \ + ENTRY(ARM_FORMAT_MISCFRM, 26) \ + ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \ + ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \ + ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \ + ENTRY(ARM_FORMAT_NLdSt, 30) \ + ENTRY(ARM_FORMAT_N1RegModImm, 31) \ + ENTRY(ARM_FORMAT_N2Reg, 32) \ + ENTRY(ARM_FORMAT_NVCVT, 33) \ + ENTRY(ARM_FORMAT_NVecDupLn, 34) \ + ENTRY(ARM_FORMAT_N2RegVecShL, 35) \ + ENTRY(ARM_FORMAT_N2RegVecShR, 36) \ + ENTRY(ARM_FORMAT_N3Reg, 37) \ + ENTRY(ARM_FORMAT_N3RegVecSh, 38) \ + ENTRY(ARM_FORMAT_NVecExtract, 39) \ + ENTRY(ARM_FORMAT_NVecMulScalar, 40) \ + ENTRY(ARM_FORMAT_NVTBL, 41) // ARM instruction format specifies the encoding used by the instruction. #define ENTRY(n, v) n = v, @@ -126,8 +126,8 @@ static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) { } /// Utility function for setting [From, To] bits to Val for a uint32_t. -static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To, - uint32_t Val) { +static inline void setSlice(unsigned &Bits, unsigned From, unsigned To, + unsigned Val) { assert(From < 32 && To < 32 && From >= To); uint32_t Mask = ((1 << (From - To + 1)) - 1); Bits &= ~(Mask << To); diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 4b7a0bf6fdb9..112817b13cf9 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -103,7 +103,7 @@ static inline unsigned getT1Cond(uint32_t insn) { } static inline bool IsGPR(unsigned RegClass) { - return RegClass == ARM::GPRRegClassID; + return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID; } // Utilities for 32-bit Thumb instructions. @@ -220,7 +220,7 @@ static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5, switch (bits2) { default: assert(0 && "No such value"); case 0: - ShOp = ARM_AM::lsl; + ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl); return imm5; case 1: ShOp = ARM_AM::lsr; @@ -1324,7 +1324,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, && OpInfo[1].RegClass == ARM::GPRRegClassID && OpInfo[2].RegClass < 0 && OpInfo[3].RegClass < 0 - && "Exactlt 4 operands expect and first two as reg operands"); + && "Exactly 4 operands expect and first two as reg operands"); // Only need to populate the src reg operand. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); @@ -1338,17 +1338,20 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID + && (OpInfo[0].RegClass == ARM::GPRRegClassID || + OpInfo[0].RegClass == ARM::rGPRRegClassID) + && (OpInfo[1].RegClass == ARM::GPRRegClassID || + OpInfo[1].RegClass == ARM::rGPRRegClassID) && "Expect >= 2 operands and first two as reg operands"); - bool ThreeReg = (NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID); + bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID || + OpInfo[2].RegClass == ARM::rGPRRegClassID)); bool NoDstReg = (decodeRs(insn) == 0xF); // Build the register operands, followed by the constant shift specifier. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, + getRegisterEnum(B, OpInfo[0].RegClass, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; @@ -1359,7 +1362,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MI.getOperand(Idx)); ++OpIdx; } else if (!NoDstReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass, decodeRn(insn)))); ++OpIdx; } else { @@ -1368,7 +1371,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, } } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeRm(insn)))); ++OpIdx; @@ -1386,14 +1389,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned imm5 = getShiftAmtBits(insn); ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift; unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp); - - // PKHBT/PKHTB are special in that we need the decodeImmShift() call to - // decode the shift amount from raw imm5 and bits2, but we DO NOT need - // to encode the ShOp, as it's in the asm string already. - if (Opcode == ARM::t2PKHBT || Opcode == ARM::t2PKHTB) - MI.addOperand(MCOperand::CreateImm(ShAmt)); - else - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt))); + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt))); } ++OpIdx; } @@ -1416,16 +1412,20 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, OpIdx = 0; - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID + unsigned RdRegClassID = OpInfo[0].RegClass; + assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID || + RdRegClassID == ARM::rGPRRegClassID) && "Expect >= 2 operands and first one as reg operand"); - bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID); + unsigned RnRegClassID = OpInfo[1].RegClass; + bool TwoReg = (RnRegClassID == ARM::GPRRegClassID + || RnRegClassID == ARM::rGPRRegClassID); bool NoDstReg = (decodeRs(insn) == 0xF); // Build the register operands, followed by the modified immediate. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, + getRegisterEnum(B, RdRegClassID, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; @@ -1434,7 +1434,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n"); return false; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1455,30 +1455,48 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, static inline bool Thumb2SaturateOpcode(unsigned Opcode) { switch (Opcode) { - case ARM::t2SSATlsl: case ARM::t2SSATasr: case ARM::t2SSAT16: - case ARM::t2USATlsl: case ARM::t2USATasr: case ARM::t2USAT16: + case ARM::t2SSAT: case ARM::t2SSAT16: + case ARM::t2USAT: case ARM::t2USAT16: return true; default: return false; } } -static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { - switch (Opcode) { - case ARM::t2SSATlsl: - case ARM::t2SSATasr: - return slice(insn, 4, 0) + 1; - case ARM::t2SSAT16: - return slice(insn, 3, 0) + 1; - case ARM::t2USATlsl: - case ARM::t2USATasr: - return slice(insn, 4, 0); - case ARM::t2USAT16: - return slice(insn, 3, 0); - default: - assert(0 && "Unexpected opcode"); - return 0; +/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions: +/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt +/// o t2SSAT16, t2USAT16: Rs sat_pos Rn +static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, + unsigned &NumOpsAdded, BO B) { + const TargetInstrDesc &TID = ARMInsts[Opcode]; + NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands + + // Disassemble the register def. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + decodeRs(insn)))); + + unsigned Pos = slice(insn, 4, 0); + if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16) + Pos += 1; + MI.addOperand(MCOperand::CreateImm(Pos)); + + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + decodeRn(insn)))); + + if (NumOpsAdded == 4) { + ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ? + ARM_AM::asr : ARM_AM::lsl); + // Inst{14-12:7-6} encodes the imm5 shift amount. + unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6); + if (ShAmt == 0) { + if (Opc == ARM_AM::asr) + ShAmt = 32; + else + Opc = ARM_AM::no_shift; + } + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt))); } + return true; } // A6.3.3 Data-processing (plain binary immediate) @@ -1492,11 +1510,6 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { // o t2SBFX (SBFX): Rs Rn lsb width // o t2UBFX (UBFX): Rs Rn lsb width // o t2BFI (BFI): Rs Rn lsb width -// -// [Signed|Unsigned] Saturate [16] -// -// o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt -// o t2SSAT16, t2USAT16: Rs sat_pos Rn static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -1506,41 +1519,21 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, OpIdx = 0; - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID + unsigned RdRegClassID = OpInfo[0].RegClass; + assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID || + RdRegClassID == ARM::rGPRRegClassID) && "Expect >= 2 operands and first one as reg operand"); - bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID); + unsigned RnRegClassID = OpInfo[1].RegClass; + bool TwoReg = (RnRegClassID == ARM::GPRRegClassID + || RnRegClassID == ARM::rGPRRegClassID); // Build the register operand(s), followed by the immediate(s). - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID, decodeRs(insn)))); ++OpIdx; - // t2SSAT/t2SSAT16/t2USAT/t2USAT16 has imm operand after Rd. - if (Thumb2SaturateOpcode(Opcode)) { - MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - - if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) { - OpIdx += 2; - return true; - } - - // For SSAT operand reg (Rn) has been disassembled above. - // Now disassemble the shift amount. - - // Inst{14-12:7-6} encodes the imm5 shift amount. - unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6); - - MI.addOperand(MCOperand::CreateImm(ShAmt)); - - OpIdx += 3; - return true; - } - if (TwoReg) { assert(NumOps >= 3 && "Expect >= 3 operands"); int Idx; @@ -1549,12 +1542,19 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MI.getOperand(Idx)); } else { // Add src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, decodeRn(insn)))); } ++OpIdx; } + if (Opcode == ARM::t2BFI) { + // Add val reg operand. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, + decodeRn(insn)))); + ++OpIdx; + } + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1567,7 +1567,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC) { + else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { uint32_t mask = 0; if (getBitfieldInvMask(insn, mask)) MI.addOperand(MCOperand::CreateImm(mask)); @@ -1575,17 +1575,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, return false; } else { // Handle the case of: lsb width - assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX || - Opcode == ARM::t2BFI) && "Unexpected opcode"); + assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX) + && "Unexpected opcode"); MI.addOperand(MCOperand::CreateImm(getLsb(insn))); - if (Opcode == ARM::t2BFI) { - if (getMsb(insn) < getLsb(insn)) { - DEBUG(errs() << "Encoding error: msb < lsb\n"); - return false; - } - MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1)); - } else - MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); + MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); ++OpIdx; } @@ -1618,8 +1611,8 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) { // A8.6.26 // t2BXJ -> Rn // -// Miscellaneous control: t2Int_MemBarrierV7 (and its t2DMB variants), -// t2Int_SyncBarrierV7 (and its t2DSB varianst), t2ISBsy, t2CLREX +// Miscellaneous control: t2DMBsy (and its t2DMB variants), +// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX // -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants) // // Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV @@ -1959,25 +1952,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass == ARM::rGPRRegClassID && + OpInfo[1].RegClass == ARM::rGPRRegClassID && "Expect >= 2 operands and first two as reg operands"); // Build the register operands, followed by the optional rotation amount. - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; + bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRs(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -2009,26 +2002,26 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && - OpInfo[2].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass == ARM::rGPRRegClassID && + OpInfo[1].RegClass == ARM::rGPRRegClassID && + OpInfo[2].RegClass == ARM::rGPRRegClassID && "Expect >= 3 operands and first three as reg operands"); // Build the register operands. - bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID; + bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRm(insn)))); if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRd(insn)))); NumOpsAdded = FourReg ? 4 : 3; @@ -2054,26 +2047,26 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && - OpInfo[2].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass == ARM::rGPRRegClassID && + OpInfo[1].RegClass == ARM::rGPRRegClassID && + OpInfo[2].RegClass == ARM::rGPRRegClassID && "Expect >= 3 operands and first three as reg operands"); - bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID; + bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID; // Build the register operands. if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRm(insn)))); if (FourReg) @@ -2152,22 +2145,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, break; case 2: if (op == 0) { - if (slice(op2, 5, 5) == 0) { + if (slice(op2, 5, 5) == 0) // Data-processing (modified immediate) return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } else { - // Data-processing (plain binary immediate) - return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - } else { - // Branches and miscellaneous control on page A6-20. - return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } + if (Thumb2SaturateOpcode(Opcode)) + return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B); - break; + // Data-processing (plain binary immediate) + return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded, + B); + } + // Branches and miscellaneous control on page A6-20. + return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded, + B); case 3: switch (slice(op2, 6, 5)) { case 0: diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index 9e3ff29e07c4..b3fcfaf6bda7 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -14,10 +14,11 @@ TARGET = ARM # Make sure that tblgen is run, first thing. BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ - ARMGenInstrInfo.inc ARMGenAsmWriter.inc \ + ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ ARMGenDAGISel.inc ARMGenSubtarget.inc \ ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ - ARMGenDecoderTables.inc ARMGenEDInfo.inc + ARMGenDecoderTables.inc ARMGenEDInfo.inc \ + ARMGenFastISel.inc DIRS = AsmPrinter AsmParser Disassembler TargetInfo diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index bbdd3c7f7c3e..97e54bfaed9e 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -24,7 +24,7 @@ STATISTIC(NumVMovs, "Number of reg-reg moves converted"); namespace { struct NEONMoveFixPass : public MachineFunctionPass { static char ID; - NEONMoveFixPass() : MachineFunctionPass(&ID) {} + NEONMoveFixPass() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &Fn); diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index f67717cdd56f..3407ac6fe08e 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -23,7 +23,7 @@ namespace { public: static char ID; - NEONPreAllocPass() : MachineFunctionPass(&ID) {} + NEONPreAllocPass() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -51,13 +51,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, default: break; - case ARM::VLD1q8: - case ARM::VLD1q16: - case ARM::VLD1q32: - case ARM::VLD1q64: - case ARM::VLD2d8: - case ARM::VLD2d16: - case ARM::VLD2d32: case ARM::VLD2LNd8: case ARM::VLD2LNd16: case ARM::VLD2LNd32: @@ -65,13 +58,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 2; return true; - case ARM::VLD2q8: - case ARM::VLD2q16: - case ARM::VLD2q32: - FirstOpnd = 0; - NumRegs = 4; - return true; - case ARM::VLD2LNq16: case ARM::VLD2LNq32: FirstOpnd = 0; @@ -88,10 +74,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VLD3d8: - case ARM::VLD3d16: - case ARM::VLD3d32: - case ARM::VLD1d64T: case ARM::VLD3LNd8: case ARM::VLD3LNd16: case ARM::VLD3LNd32: @@ -99,24 +81,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 3; return true; - case ARM::VLD3q8_UPD: - case ARM::VLD3q16_UPD: - case ARM::VLD3q32_UPD: - FirstOpnd = 0; - NumRegs = 3; - Offset = 0; - Stride = 2; - return true; - - case ARM::VLD3q8odd_UPD: - case ARM::VLD3q16odd_UPD: - case ARM::VLD3q32odd_UPD: - FirstOpnd = 0; - NumRegs = 3; - Offset = 1; - Stride = 2; - return true; - case ARM::VLD3LNq16: case ARM::VLD3LNq32: FirstOpnd = 0; @@ -133,10 +97,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VLD4d8: - case ARM::VLD4d16: - case ARM::VLD4d32: - case ARM::VLD1d64Q: case ARM::VLD4LNd8: case ARM::VLD4LNd16: case ARM::VLD4LNd32: @@ -144,24 +104,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VLD4q8_UPD: - case ARM::VLD4q16_UPD: - case ARM::VLD4q32_UPD: - FirstOpnd = 0; - NumRegs = 4; - Offset = 0; - Stride = 2; - return true; - - case ARM::VLD4q8odd_UPD: - case ARM::VLD4q16odd_UPD: - case ARM::VLD4q32odd_UPD: - FirstOpnd = 0; - NumRegs = 4; - Offset = 1; - Stride = 2; - return true; - case ARM::VLD4LNq16: case ARM::VLD4LNq32: FirstOpnd = 0; @@ -178,13 +120,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VST1q8: - case ARM::VST1q16: - case ARM::VST1q32: - case ARM::VST1q64: - case ARM::VST2d8: - case ARM::VST2d16: - case ARM::VST2d32: case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: @@ -192,13 +127,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 2; return true; - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - FirstOpnd = 2; - NumRegs = 4; - return true; - case ARM::VST2LNq16: case ARM::VST2LNq32: FirstOpnd = 2; @@ -215,10 +143,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VST3d8: - case ARM::VST3d16: - case ARM::VST3d32: - case ARM::VST1d64T: case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: @@ -226,24 +150,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 3; return true; - case ARM::VST3q8_UPD: - case ARM::VST3q16_UPD: - case ARM::VST3q32_UPD: - FirstOpnd = 4; - NumRegs = 3; - Offset = 0; - Stride = 2; - return true; - - case ARM::VST3q8odd_UPD: - case ARM::VST3q16odd_UPD: - case ARM::VST3q32odd_UPD: - FirstOpnd = 4; - NumRegs = 3; - Offset = 1; - Stride = 2; - return true; - case ARM::VST3LNq16: case ARM::VST3LNq32: FirstOpnd = 2; @@ -260,10 +166,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VST4d8: - case ARM::VST4d16: - case ARM::VST4d32: - case ARM::VST1d64Q: case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: @@ -271,24 +173,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VST4q8_UPD: - case ARM::VST4q16_UPD: - case ARM::VST4q32_UPD: - FirstOpnd = 4; - NumRegs = 4; - Offset = 0; - Stride = 2; - return true; - - case ARM::VST4q8odd_UPD: - case ARM::VST4q16odd_UPD: - case ARM::VST4q32odd_UPD: - FirstOpnd = 4; - NumRegs = 4; - Offset = 1; - Stride = 2; - return true; - case ARM::VST4LNq16: case ARM::VST4LNq32: FirstOpnd = 2; @@ -468,7 +352,34 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { continue; if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) continue; - llvm_unreachable("expected a REG_SEQUENCE"); + + MachineBasicBlock::iterator NextI = llvm::next(MBBI); + for (unsigned R = 0; R < NumRegs; ++R) { + MachineOperand &MO = MI->getOperand(FirstOpnd + R); + assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + + // For now, just assign a fixed set of adjacent registers. + // This leaves plenty of room for future improvements. + static const unsigned NEONDRegs[] = { + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 + }; + MO.setReg(NEONDRegs[Offset + R * Stride]); + + if (MO.isUse()) { + // Insert a copy from VirtReg. + BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),MO.getReg()) + .addReg(VirtReg, getKillRegState(MO.isKill())); + MO.setIsKill(); + } else if (MO.isDef() && !MO.isDead()) { + // Add a copy to VirtReg. + BuildMI(MBB, NextI, DebugLoc(), TII->get(TargetOpcode::COPY), VirtReg) + .addReg(MO.getReg()); + } + } } return Modified; diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 0cb8ff01181d..9fc3fb92cb2c 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -611,27 +611,6 @@ constant which was already loaded). Not sure what's necessary to do that. //===---------------------------------------------------------------------===// -Given the following on ARMv7: -int test1(int A, int B) { - return (A&-8388481)|(B&8388480); -} - -We currently generate: - bfc r0, #7, #16 - movw r2, #:lower16:8388480 - movt r2, #:upper16:8388480 - and r1, r1, r2 - orr r0, r1, r0 - bx lr - -The following is much shorter: - lsr r1, r1, #7 - bfi r0, r1, #7, #16 - bx lr - - -//===---------------------------------------------------------------------===// - The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal: int a(int x) { return __builtin_bswap32(x); } @@ -657,3 +636,24 @@ A custom Thumb version would also be a slight improvement over the generic version. //===---------------------------------------------------------------------===// + +Consider the following simple C code: + +void foo(unsigned char *a, unsigned char *b, int *c) { + if ((*a | *b) == 0) *c = 0; +} + +currently llvm-gcc generates something like this (nice branchless code I'd say): + + ldrb r0, [r0] + ldrb r1, [r1] + orr r0, r1, r0 + tst r0, #255 + moveq r0, #0 + streq r0, [r2] + bx lr + +Note that both "tst" and "moveq" are redundant. + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 39b70b43b23f..a21a3da10bda 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -68,7 +68,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg); } -bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { +bool Thumb1RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); // It's not always a good idea to include the call frame as part of the @@ -363,107 +363,19 @@ static void removeOperands(MachineInstr &MI, unsigned i) { MI.RemoveOperand(Op); } -int Thumb1RegisterInfo:: -rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int Offset, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const -{ - // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo - // version then can pull out Thumb1 specific parts here - return 0; -} - -/// saveScavengerRegister - Spill the register so it can be used by the -/// register scavenger. Return true. -bool -Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator &UseMI, - const TargetRegisterClass *RC, - unsigned Reg) const { - // Thumb1 can't use the emergency spill slot on the stack because - // ldr/str immediate offsets must be positive, and if we're referencing - // off the frame pointer (if, for example, there are alloca() calls in - // the function, the offset will be negative. Use R12 instead since that's - // a call clobbered register that we know won't be used in Thumb1 mode. - DebugLoc DL; - BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)). - addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill); - - // The UseMI is where we would like to restore the register. If there's - // interference with R12 before then, however, we'll need to restore it - // before that instead and adjust the UseMI. - bool done = false; - for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { - if (II->isDebugValue()) - continue; - // If this instruction affects R12, adjust our restore point. - for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = II->getOperand(i); - if (!MO.isReg() || MO.isUndef() || !MO.getReg() || - TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - if (MO.getReg() == ARM::R12) { - UseMI = II; - done = true; - break; - } - } - } - // Restore the register from R12 - BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)). - addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill); - - return true; -} - -unsigned -Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const{ - unsigned VReg = 0; - unsigned i = 0; +bool Thumb1RegisterInfo:: +rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - unsigned FrameReg = ARM::SP; - int FrameIndex = MI.getOperand(i).getIndex(); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MF.getFrameInfo()->getStackSize() + SPAdj; - - if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea1Offset(); - else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea2Offset(); - else if (MF.getFrameInfo()->hasVarSizedObjects()) { - assert(SPAdj == 0 && hasFP(MF) && "Unexpected"); - // There are alloca()'s in this function, must reference off the frame - // pointer instead. - FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); - } - - // Special handling of dbg_value instructions. - if (MI.isDebugValue()) { - MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); - return 0; - } - unsigned Opcode = MI.getOpcode(); const TargetInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); if (Opcode == ARM::tADDrSPi) { - Offset += MI.getOperand(i+1).getImm(); + Offset += MI.getOperand(FrameRegIdx+1).getImm(); // Can't use tADDrSPi if it's based off the frame pointer. unsigned NumBits = 0; @@ -483,12 +395,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) { // Turn it into a move. MI.setDesc(TII.get(ARM::tMOVgpr2tgpr)); - MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); // Remove offset and remaining explicit predicate operands. - do MI.RemoveOperand(i+1); - while (MI.getNumOperands() > i+1 && - (!MI.getOperand(i+1).isReg() || !MI.getOperand(i+1).isImm())); - return 0; + do MI.RemoveOperand(FrameRegIdx+1); + while (MI.getNumOperands() > FrameRegIdx+1 && + (!MI.getOperand(FrameRegIdx+1).isReg() || + !MI.getOperand(FrameRegIdx+1).isImm())); + return true; } // Common case: small offset, fits into instruction. @@ -496,15 +409,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (((Offset / Scale) & ~Mask) == 0) { // Replace the FrameIndex with sp / fp if (Opcode == ARM::tADDi3) { - removeOperands(MI, i); + removeOperands(MI, FrameRegIdx); MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) .addImm(Offset / Scale)); } else { - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale); } - return 0; + return true; } unsigned DestReg = MI.getOperand(0).getReg(); @@ -516,7 +429,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, *this, dl); MBB.erase(II); - return 0; + return true; } if (Offset > 0) { @@ -524,12 +437,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // r0 = add sp, 255*4 // r0 = add r0, (imm - 255*4) if (Opcode == ARM::tADDi3) { - removeOperands(MI, i); + removeOperands(MI, FrameRegIdx); MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); } else { - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Mask); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask); } Offset = (Offset - Mask * Scale); MachineBasicBlock::iterator NII = llvm::next(II); @@ -542,14 +455,14 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); MI.setDesc(TII.get(ARM::tADDhirr)); - MI.getOperand(i).ChangeToRegister(DestReg, false, false, true); - MI.getOperand(i+1).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true); + MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false); if (Opcode == ARM::tADDi3) { MachineInstrBuilder MIB(&MI); AddDefaultPred(MIB); } } - return 0; + return true; } else { unsigned ImmIdx = 0; int InstrOffs = 0; @@ -557,7 +470,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Scale = 1; switch (AddrMode) { case ARMII::AddrModeT1_s: { - ImmIdx = i+1; + ImmIdx = FrameRegIdx+1; InstrOffs = MI.getOperand(ImmIdx).getImm(); NumBits = (FrameReg == ARM::SP) ? 8 : 5; Scale = 4; @@ -577,9 +490,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Mask = (1 << NumBits) - 1; if ((unsigned)Offset <= Mask * Scale) { // Replace the FrameIndex with sp - MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); ImmOp.ChangeToImmediate(ImmedOffset); - return 0; + return true; } bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; @@ -600,12 +513,126 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset &= ~(Mask*Scale); } } + return Offset == 0; +} + +void +Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const { + MachineInstr &MI = *I; + int Off = Offset; // ARM doesn't need the general 64-bit offsets + unsigned i = 0; + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + bool Done = false; + Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII); + assert (Done && "Unable to resolve frame index!"); +} + +/// saveScavengerRegister - Spill the register so it can be used by the +/// register scavenger. Return true. +bool +Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const { + // Thumb1 can't use the emergency spill slot on the stack because + // ldr/str immediate offsets must be positive, and if we're referencing + // off the frame pointer (if, for example, there are alloca() calls in + // the function, the offset will be negative. Use R12 instead since that's + // a call clobbered register that we know won't be used in Thumb1 mode. + DebugLoc DL; + BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)). + addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill); + + // The UseMI is where we would like to restore the register. If there's + // interference with R12 before then, however, we'll need to restore it + // before that instead and adjust the UseMI. + bool done = false; + for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { + if (II->isDebugValue()) + continue; + // If this instruction affects R12, adjust our restore point. + for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = II->getOperand(i); + if (!MO.isReg() || MO.isUndef() || !MO.getReg() || + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + if (MO.getReg() == ARM::R12) { + UseMI = II; + done = true; + break; + } + } + } + // Restore the register from R12 + BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)). + addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill); + + return true; +} + +void +Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + unsigned VReg = 0; + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc dl = MI.getDebugLoc(); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + unsigned FrameReg = ARM::SP; + int FrameIndex = MI.getOperand(i).getIndex(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MF.getFrameInfo()->getStackSize() + SPAdj; + + if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea2Offset(); + else if (MF.getFrameInfo()->hasVarSizedObjects()) { + assert(SPAdj == 0 && hasFP(MF) && "Unexpected"); + // There are alloca()'s in this function, must reference off the frame + // pointer or base pointer instead. + if (!hasBasePointer(MF)) { + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } else + FrameReg = BasePtr; + } + + // Special handling of dbg_value instructions. + if (MI.isDebugValue()) { + MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return; + } + + // Modify MI as necessary to handle as much of 'Offset' as possible + assert(AFI->isThumbFunction() && + "This eliminateFrameIndex only supports Thumb1!"); + if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII)) + return; // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above, handle the rest, providing a register that is // SP+LargeImm. assert(Offset && "This code isn't needed if offset already handled!"); + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = MI.getDesc(); + // Remove predicate first. int PIdx = MI.findFirstPredOperandIdx(); if (PIdx != -1) @@ -637,11 +664,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.addOperand(MachineOperand::CreateReg(0, false)); } else if (Desc.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); - assert (Value && "Frame index virtual allocated, but Value arg is NULL!"); bool UseRR = false; - bool TrackVReg = true; - Value->first = FrameReg; // use the frame register as a kind indicator - Value->second = Offset; if (Opcode == ARM::tSpill) { if (FrameReg == ARM::SP) @@ -650,7 +673,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, else { emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); UseRR = true; - TrackVReg = false; } } else emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII, @@ -661,8 +683,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); else // tSTR has an extra register operand. MI.addOperand(MachineOperand::CreateReg(0, false)); - if (!ReuseFrameIndexVals || !TrackVReg) - VReg = 0; } else assert(false && "Unexpected opcode!"); @@ -671,7 +691,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstrBuilder MIB(&MI); AddDefaultPred(MIB); } - return VReg; } void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { @@ -742,11 +761,11 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { dl = MBBI->getDebugLoc(); } - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - if (STI.isTargetDarwin() || hasFP(MF)) { + // Adjust FP so it point to the stack slot that contains the previous FP. + if (hasFP(MF)) { BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0); + AFI->setShouldRestoreSPFromFP(true); } // Determine starting offsets of spill areas. @@ -764,14 +783,20 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); } - if (STI.isTargetELF() && hasFP(MF)) { + if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); - } AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + + // If we need a base pointer, set it up here. It's whatever the value + // of the stack pointer is at this point. Any variable size objects + // will be allocated after this, so we can still use the base pointer + // to reference locals. + if (hasBasePointer(MF)) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP); } static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { @@ -828,7 +853,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); - if (hasFP(MF)) { + if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot or target is ELF and the function has FP. diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 9a0308afa20c..c578054a5d71 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -38,27 +38,27 @@ public: unsigned PredReg = 0) const; /// Code Generation virtual methods... - bool hasReservedCallFrame(MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - // rewrite MI to access 'Offset' bytes from the FP. Return the offset that - // could not be handled directly in MI. - int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int Offset, - unsigned MOVOpc, unsigned ADDriOpc, - unsigned SUBriOpc) const; - + // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be + // however much remains to be handled. Return 'true' if no further + // work is required. + bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII) const; + void resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const; bool saveScavengerRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC, unsigned Reg) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index cd15bbed9f23..45e693744b80 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -27,7 +27,7 @@ namespace { public: static char ID; - Thumb2ITBlockPass() : MachineFunctionPass(&ID) {} + Thumb2ITBlockPass() : MachineFunctionPass(ID) {} const Thumb2InstrInfo *TII; const TargetRegisterInfo *TRI; @@ -91,35 +91,53 @@ static void TrackDefUses(MachineInstr *MI, } } +static bool isCopy(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: + return false; + case ARM::MOVr: + case ARM::MOVr_TC: + case ARM::tMOVr: + case ARM::tMOVgpr2tgpr: + case ARM::tMOVtgpr2gpr: + case ARM::tMOVgpr2gpr: + case ARM::t2MOVr: + return true; + } +} + bool Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, ARMCC::CondCodes CC, ARMCC::CondCodes OCC, SmallSet<unsigned, 4> &Defs, SmallSet<unsigned, 4> &Uses) { - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - assert(SrcSubIdx == 0 && DstSubIdx == 0 && - "Sub-register indices still around?"); - // llvm models select's as two-address instructions. That means a copy - // is inserted before a t2MOVccr, etc. If the copy is scheduled in - // between selects we would end up creating multiple IT blocks. - - // First check if it's safe to move it. - if (Uses.count(DstReg) || Defs.count(SrcReg)) - return false; - - // Then peek at the next instruction to see if it's predicated on CC or OCC. - // If not, then there is nothing to be gained by moving the copy. - MachineBasicBlock::iterator I = MI; ++I; - MachineBasicBlock::iterator E = MI->getParent()->end(); - while (I != E && I->isDebugValue()) - ++I; - if (I != E) { - unsigned NPredReg = 0; - ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); - if (NCC == CC || NCC == OCC) - return true; - } + if (!isCopy(MI)) + return false; + // llvm models select's as two-address instructions. That means a copy + // is inserted before a t2MOVccr, etc. If the copy is scheduled in + // between selects we would end up creating multiple IT blocks. + assert(MI->getOperand(0).getSubReg() == 0 && + MI->getOperand(1).getSubReg() == 0 && + "Sub-register indices still around?"); + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + // First check if it's safe to move it. + if (Uses.count(DstReg) || Defs.count(SrcReg)) + return false; + + // Then peek at the next instruction to see if it's predicated on CC or OCC. + // If not, then there is nothing to be gained by moving the copy. + MachineBasicBlock::iterator I = MI; ++I; + MachineBasicBlock::iterator E = MI->getParent()->end(); + while (I != E && I->isDebugValue()) + ++I; + if (I != E) { + unsigned NPredReg = 0; + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); + if (NCC == CC || NCC == OCC) + return true; } return false; } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index ee517279c9d7..442f41da8a2d 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -147,8 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -173,8 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index ba392f36d946..0c3962dd123d 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -173,7 +173,7 @@ namespace { char Thumb2SizeReduce::ID = 0; } -Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) { +Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { unsigned FromOpc = ReduceTable[i].WideOpc; if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) @@ -315,6 +315,18 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) return false; + // For the non-writeback version (this one), the base register must be + // one of the registers being loaded. + bool isOK = false; + for (unsigned i = 4; i < MI->getNumOperands(); ++i) { + if (MI->getOperand(i).getReg() == BaseReg) { + isOK = true; + break; + } + } + if (!isOK) + return false; + OpNum = 0; isLdStMul = true; break; |