diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
74 files changed, 9562 insertions, 4220 deletions
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp index f0d4dbe2bfb3..ff585b41a2aa 100644 --- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -615,7 +615,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { SmallVector<unsigned, 8> Defs = getReadDPRs(MI); bool Modified = false; - for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end(); + for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end(); I != E; ++I) { // Follow the def-use chain for this DPR through COPYs, and also through // PHIs (which are essentially multi-way COPYs). It is because of PHIs that @@ -630,7 +630,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { elideCopiesAndPHIs(Def, DefSrcs); - for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(), + for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(), EE = DefSrcs.end(); II != EE; ++II) { MachineInstr *MI = *II; @@ -655,8 +655,15 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { if (NewReg != 0) { Modified = true; - for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(), + for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(), E = Uses.end(); I != E; ++I) { + // Make sure to constrain the register class of the new register to + // match what we're replacing. Otherwise we can optimize a DPR_VFP2 + // reference into a plain DPR, and that will end poorly. NewReg is + // always virtual here, so there will always be a matching subclass + // to find. + MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg())); + DEBUG(dbgs() << "Replacing operand " << **I << " with " << PrintReg(NewReg) << "\n"); diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 2d7470919dc4..36e5680ca4e0 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -38,12 +38,16 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", - "Does not support ARM mode execution">; + "Does not support ARM mode execution", + [ModeThumb]>; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", "Enable VFP4 instructions", [FeatureVFP3, FeatureFP16]>; +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", + "true", "Enable ARMv8 FP", + [FeatureVFP4]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", "Restrict VFP3 to 16 double registers">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", @@ -59,8 +63,15 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", "Floating point unit supports single precision only">; +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable support for Performance Monitor extensions">; def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", "Enable support for TrustZone security extensions">; +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable support for Cryptography extensions", + [FeatureNEON]>; +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable support for CRC instructions">; // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better @@ -108,10 +119,24 @@ def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true", def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; -// M-series ISA? -def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true", +// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8). +def FeatureVirtualization : SubtargetFeature<"virtualization", + "HasVirtualization", "true", + "Supports Virtualization extension", + [FeatureHWDiv, FeatureHWDivARM]>; + +// M-series ISA +def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", "Is microcontroller profile ('M' series)">; +// R-series ISA +def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", + "Is realtime profile ('R' series)">; + +// A-series ISA +def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", + "Is application profile ('A' series)">; + // Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. // See ARMInstrInfo.td for details. def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", @@ -129,12 +154,19 @@ def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", "Support ARM v6 instructions", [HasV5TEOps]>; +def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", + "Support ARM v6M instructions", + [HasV6Ops]>; def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", "Support ARM v6t2 instructions", - [HasV6Ops, FeatureThumb2]>; + [HasV6MOps, FeatureThumb2]>; def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", - [HasV6T2Ops]>; + [HasV6T2Ops, FeaturePerfMon]>; +def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", + "Support ARM v8 instructions", + [HasV7Ops, FeatureVirtualization, + FeatureMP]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -170,12 +202,27 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", // FIXME: It has not been determined if A15 has these features. def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", "Cortex-A15 ARM processors", - [FeatureT2XtPk, FeatureFP16, + [FeatureT2XtPk, FeatureVFP4, + FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureAvoidPartialCPSR, - FeatureTrustZone]>; + FeatureTrustZone, FeatureVirtualization]>; + +def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", + [FeatureHWDiv, FeatureHWDivARM, + FeatureTrustZone, FeatureT2XtPk, + FeatureCrypto, FeatureCRC]>; + +def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", + [FeatureHWDiv, FeatureHWDivARM, + FeatureTrustZone, FeatureT2XtPk, + FeatureCrypto, FeatureCRC]>; + def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", "Cortex-R5 ARM processors", - [FeatureSlowFPBrcc, FeatureHWDivARM, + [FeatureSlowFPBrcc, + FeatureHWDiv, FeatureHWDivARM, FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR, FeatureT2XtPk]>; @@ -233,7 +280,7 @@ def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, FeatureHasSlowFPVMLx]>; // V6M Processors. -def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM, +def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, FeatureDB, FeatureMClass]>; // V6T2 Processors. @@ -248,26 +295,30 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, def : ProcessorModel<"cortex-a5", CortexA8Model, [ProcA5, HasV7Ops, FeatureNEON, FeatureDB, FeatureVFP4, FeatureDSPThumb2, - FeatureHasRAS]>; + FeatureHasRAS, FeatureAClass]>; def : ProcessorModel<"cortex-a8", CortexA8Model, [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS]>; + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; def : ProcessorModel<"cortex-a9", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS]>; + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; def : ProcessorModel<"cortex-a9-mp", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureMP, - FeatureHasRAS]>; + FeatureHasRAS, FeatureAClass]>; // FIXME: A15 has currently the same ProcessorModel as A9. def : ProcessorModel<"cortex-a15", CortexA9Model, [ProcA15, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS]>; + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; // FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ProcR5, HasV7Ops, FeatureDB, FeatureVFP3, FeatureDSPThumb2, - FeatureHasRAS]>; + FeatureHasRAS, FeatureVFPOnlySP, + FeatureD16, FeatureRClass]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, @@ -279,13 +330,22 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, FeatureT2XtPk, FeatureVFP4, - FeatureVFPOnlySP, FeatureMClass]>; + FeatureVFPOnlySP, FeatureD16, + FeatureMClass]>; // Swift uArch Processors. def : ProcessorModel<"swift", SwiftModel, [ProcSwift, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, - FeatureHasRAS]>; + FeatureHasRAS, FeatureAClass]>; + +// V8 Processors +def : ProcNoItin<"cortex-a53", [ProcA53, HasV8Ops, FeatureAClass, + FeatureDB, FeatureFPARMv8, + FeatureNEON, FeatureDSPThumb2]>; +def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass, + FeatureDB, FeatureFPARMv8, + FeatureNEON, FeatureDSPThumb2]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 13ec2087938a..e79f88d4b6f1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -17,6 +17,7 @@ #include "ARM.h" #include "ARMBuildAttrs.h" #include "ARMConstantPoolValue.h" +#include "ARMFPUName.h" #include "ARMMachineFunctionInfo.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" @@ -55,235 +56,67 @@ #include <cctype> using namespace llvm; -namespace { - - // Per section and per symbol attributes are not supported. - // To implement them we would need the ability to delay this emission - // until the assembly file is fully parsed/generated as only then do we - // know the symbol and section numbers. - class AttributeEmitter { - public: - virtual void MaybeSwitchVendor(StringRef Vendor) = 0; - virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0; - virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0; - virtual void Finish() = 0; - virtual ~AttributeEmitter() {} - }; - - class AsmAttributeEmitter : public AttributeEmitter { - MCStreamer &Streamer; - - public: - AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {} - void MaybeSwitchVendor(StringRef Vendor) { } - - void EmitAttribute(unsigned Attribute, unsigned Value) { - Streamer.EmitRawText("\t.eabi_attribute " + - Twine(Attribute) + ", " + Twine(Value)); - } - - void EmitTextAttribute(unsigned Attribute, StringRef String) { - switch (Attribute) { - default: llvm_unreachable("Unsupported Text attribute in ASM Mode"); - case ARMBuildAttrs::CPU_name: - Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower()); - break; - /* GAS requires .fpu to be emitted regardless of EABI attribute */ - case ARMBuildAttrs::Advanced_SIMD_arch: - case ARMBuildAttrs::VFP_arch: - Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower()); - break; - } - } - void Finish() { } - }; - - class ObjectAttributeEmitter : public AttributeEmitter { - // This structure holds all attributes, accounting for - // their string/numeric value, so we can later emmit them - // in declaration order, keeping all in the same vector - struct AttributeItemType { - enum { - HiddenAttribute = 0, - NumericAttribute, - TextAttribute - } Type; - unsigned Tag; - unsigned IntValue; - StringRef StringValue; - } AttributeItem; - - MCObjectStreamer &Streamer; - StringRef CurrentVendor; - SmallVector<AttributeItemType, 64> Contents; - - // Account for the ULEB/String size of each item, - // not just the number of items - size_t ContentsSize; - // FIXME: this should be in a more generic place, but - // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf - size_t getULEBSize(int Value) { - size_t Size = 0; - do { - Value >>= 7; - Size += sizeof(int8_t); // Is this really necessary? - } while (Value); - return Size; - } - - public: - ObjectAttributeEmitter(MCObjectStreamer &Streamer_) : - Streamer(Streamer_), CurrentVendor(""), ContentsSize(0) { } - - void MaybeSwitchVendor(StringRef Vendor) { - assert(!Vendor.empty() && "Vendor cannot be empty."); - - if (CurrentVendor.empty()) - CurrentVendor = Vendor; - else if (CurrentVendor == Vendor) - return; - else - Finish(); - - CurrentVendor = Vendor; - - assert(Contents.size() == 0); - } - - void EmitAttribute(unsigned Attribute, unsigned Value) { - AttributeItemType attr = { - AttributeItemType::NumericAttribute, - Attribute, - Value, - StringRef("") - }; - ContentsSize += getULEBSize(Attribute); - ContentsSize += getULEBSize(Value); - Contents.push_back(attr); - } - - void EmitTextAttribute(unsigned Attribute, StringRef String) { - AttributeItemType attr = { - AttributeItemType::TextAttribute, - Attribute, - 0, - String - }; - ContentsSize += getULEBSize(Attribute); - // String + \0 - ContentsSize += String.size()+1; - - Contents.push_back(attr); - } - - void Finish() { - // Vendor size + Vendor name + '\0' - const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1; - - // Tag + Tag Size - const size_t TagHeaderSize = 1 + 4; - - Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4); - Streamer.EmitBytes(CurrentVendor); - Streamer.EmitIntValue(0, 1); // '\0' - - Streamer.EmitIntValue(ARMBuildAttrs::File, 1); - Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4); - - // Size should have been accounted for already, now - // emit each field as its type (ULEB or String) - for (unsigned int i=0; i<Contents.size(); ++i) { - AttributeItemType item = Contents[i]; - Streamer.EmitULEB128IntValue(item.Tag); - switch (item.Type) { - default: llvm_unreachable("Invalid attribute type"); - case AttributeItemType::NumericAttribute: - Streamer.EmitULEB128IntValue(item.IntValue); - break; - case AttributeItemType::TextAttribute: - Streamer.EmitBytes(item.StringValue.upper()); - Streamer.EmitIntValue(0, 1); // '\0' - break; - } - } - - Contents.clear(); - } - }; - -} // end of anonymous namespace - -MachineLocation ARMAsmPrinter:: -getDebugValueLocation(const MachineInstr *MI) const { - MachineLocation Location; - assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - else { - DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); - } - return Location; -} - /// EmitDwarfRegOp - Emit dwarf register operation. -void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { +void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc, + bool Indirect) const { const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) - AsmPrinter::EmitDwarfRegOp(MLoc); - else { - unsigned Reg = MLoc.getReg(); - if (Reg >= ARM::S0 && Reg <= ARM::S31) { - assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); - // S registers are described as bit-pieces of a register - // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) - // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) - - unsigned SReg = Reg - ARM::S0; - bool odd = SReg & 0x1; - unsigned Rx = 256 + (SReg >> 1); - - OutStreamer.AddComment("DW_OP_regx for S register"); - EmitInt8(dwarf::DW_OP_regx); - - OutStreamer.AddComment(Twine(SReg)); - EmitULEB128(Rx); - - if (odd) { - OutStreamer.AddComment("DW_OP_bit_piece 32 32"); - EmitInt8(dwarf::DW_OP_bit_piece); - EmitULEB128(32); - EmitULEB128(32); - } else { - OutStreamer.AddComment("DW_OP_bit_piece 32 0"); - EmitInt8(dwarf::DW_OP_bit_piece); - EmitULEB128(32); - EmitULEB128(0); - } - } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { - assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); - // Q registers Q0-Q15 are described by composing two D registers together. - // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) - // DW_OP_piece(8) - - unsigned QReg = Reg - ARM::Q0; - unsigned D1 = 256 + 2 * QReg; - unsigned D2 = D1 + 1; - - OutStreamer.AddComment("DW_OP_regx for Q register: D1"); - EmitInt8(dwarf::DW_OP_regx); - EmitULEB128(D1); - OutStreamer.AddComment("DW_OP_piece 8"); - EmitInt8(dwarf::DW_OP_piece); - EmitULEB128(8); - - OutStreamer.AddComment("DW_OP_regx for Q register: D2"); - EmitInt8(dwarf::DW_OP_regx); - EmitULEB128(D2); - OutStreamer.AddComment("DW_OP_piece 8"); - EmitInt8(dwarf::DW_OP_piece); - EmitULEB128(8); + if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) { + AsmPrinter::EmitDwarfRegOp(MLoc, Indirect); + return; + } + assert(MLoc.isReg() && !Indirect && + "This doesn't support offset/indirection - implement it if needed"); + unsigned Reg = MLoc.getReg(); + if (Reg >= ARM::S0 && Reg <= ARM::S31) { + assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); + // S registers are described as bit-pieces of a register + // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) + // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) + + unsigned SReg = Reg - ARM::S0; + bool odd = SReg & 0x1; + unsigned Rx = 256 + (SReg >> 1); + + OutStreamer.AddComment("DW_OP_regx for S register"); + EmitInt8(dwarf::DW_OP_regx); + + OutStreamer.AddComment(Twine(SReg)); + EmitULEB128(Rx); + + if (odd) { + OutStreamer.AddComment("DW_OP_bit_piece 32 32"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(32); + } else { + OutStreamer.AddComment("DW_OP_bit_piece 32 0"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(0); } + } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { + assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); + // Q registers Q0-Q15 are described by composing two D registers together. + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) + // DW_OP_piece(8) + + unsigned QReg = Reg - ARM::Q0; + unsigned D1 = 256 + 2 * QReg; + unsigned D2 = D1 + 1; + + OutStreamer.AddComment("DW_OP_regx for Q register: D1"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D1); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); + + OutStreamer.AddComment("DW_OP_regx for Q register: D2"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D2); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); } } @@ -312,7 +145,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); assert(GV && "C++ constructor pointer was not a GlobalValue!"); - const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV), + const MCExpr *E = MCSymbolRefExpr::Create(getSymbol(GV), (Subtarget->isTargetDarwin() ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_ARM_TARGET1), @@ -373,7 +206,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, else if ((Modifier && strcmp(Modifier, "hi16") == 0) || (TF & ARMII::MO_HI16)) O << ":upper16:"; - O << *Mang->getSymbol(GV); + O << *getSymbol(GV); printOffset(MO.getOffset(), O); if (TF == ARMII::MO_PLT) @@ -474,8 +307,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // This takes advantage of the 2 operand-ness of ldm/stm and that we've // already got the operands in registers that are operands to the // inline asm statement. - - O << "{" << ARMInstPrinter::getRegisterName(RegBegin); + O << "{"; + if (ARM::GPRPairRegClass.contains(RegBegin)) { + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); + O << ARMInstPrinter::getRegisterName(Reg0) << ", ";; + RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1); + } + O << ARMInstPrinter::getRegisterName(RegBegin); // FIXME: The register allocator not only may not have given us the // registers in sequence, but may not be in ascending registers. This @@ -500,7 +339,38 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (!FlagsOP.isImm()) return true; unsigned Flags = FlagsOP.getImm(); + + // This operand may not be the one that actually provides the register. If + // it's tied to a previous one then we should refer instead to that one + // for registers and their classes. + unsigned TiedIdx; + if (InlineAsm::isUseOperandTiedToDef(Flags, TiedIdx)) { + for (OpNum = InlineAsm::MIOp_FirstOperand; TiedIdx; --TiedIdx) { + unsigned OpFlags = MI->getOperand(OpNum).getImm(); + OpNum += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + Flags = MI->getOperand(OpNum).getImm(); + + // Later code expects OpNum to be pointing at the register rather than + // the flags. + OpNum += 1; + } + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + unsigned RC; + InlineAsm::hasRegClassConstraint(Flags, RC); + if (RC == ARM::GPRPairRegClassID) { + if (NumVals != 1) + return true; + const MachineOperand &MO = MI->getOperand(OpNum); + if (!MO.isReg()) + return true; + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ? + ARM::gsub_0 : ARM::gsub_1); + O << ARMInstPrinter::getRegisterName(Reg); + return false; + } if (NumVals != 2) return true; unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1; @@ -704,11 +574,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } - // FIXME: This should eventually end up somewhere else where more - // intelligent flag decisions can be made. For now we are just maintaining - // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. - if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&OutStreamer)) - MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); } //===----------------------------------------------------------------------===// @@ -718,145 +583,150 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // to appear in the .ARM.attributes section in ELF. // Instead of subclassing the MCELFStreamer, we do the work here. -void ARMAsmPrinter::emitAttributes() { - - emitARMAttributeSection(); - - /* GAS expect .fpu to be emitted, regardless of VFP build attribute */ - bool emitFPU = false; - AttributeEmitter *AttrEmitter; - if (OutStreamer.hasRawTextSupport()) { - AttrEmitter = new AsmAttributeEmitter(OutStreamer); - emitFPU = true; - } else { - MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer); - AttrEmitter = new ObjectAttributeEmitter(O); - } - - AttrEmitter->MaybeSwitchVendor("aeabi"); - - std::string CPUString = Subtarget->getCPUString(); - - if (CPUString == "cortex-a8" || - Subtarget->isCortexA8()) { - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8"); - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::ApplicationProfile); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, - ARMBuildAttrs::Allowed); - AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); - // Fixme: figure out when this is emitted. - //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch, - // ARMBuildAttrs::AllowWMMXv1); - // - - /// ADD additional Else-cases here! - } else if (CPUString == "xscale") { - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TEJ); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, - ARMBuildAttrs::Allowed); - AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::Allowed); - } else if (CPUString == "generic") { - // For a generic CPU, we assume a standard v7a architecture in Subtarget. - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::ApplicationProfile); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, - ARMBuildAttrs::Allowed); - AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); - } else if (Subtarget->hasV7Ops()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); - AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); +static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU, + const ARMSubtarget *Subtarget) { + if (CPU == "xscale") + return ARMBuildAttrs::v5TEJ; + + if (Subtarget->hasV8Ops()) + return ARMBuildAttrs::v8; + else if (Subtarget->hasV7Ops()) { + if (Subtarget->isMClass() && Subtarget->hasThumb2DSP()) + return ARMBuildAttrs::v7E_M; + return ARMBuildAttrs::v7; } else if (Subtarget->hasV6T2Ops()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2); + return ARMBuildAttrs::v6T2; + else if (Subtarget->hasV6MOps()) + return ARMBuildAttrs::v6S_M; else if (Subtarget->hasV6Ops()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6); + return ARMBuildAttrs::v6; else if (Subtarget->hasV5TEOps()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE); + return ARMBuildAttrs::v5TE; else if (Subtarget->hasV5TOps()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T); + return ARMBuildAttrs::v5T; else if (Subtarget->hasV4TOps()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); + return ARMBuildAttrs::v4T; + else + return ARMBuildAttrs::v4; +} - if (Subtarget->hasNEON() && emitFPU) { - /* NEON is not exactly a VFP architecture, but GAS emit one of - * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (Subtarget->hasVFP4()) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - "neon-vfpv4"); - else - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); - /* If emitted for NEON, omit from VFP below, since you can have both - * NEON and VFP in build attributes but only one .fpu */ - emitFPU = false; +void ARMAsmPrinter::emitAttributes() { + MCTargetStreamer &TS = OutStreamer.getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); + + ATS.switchVendor("aeabi"); + + std::string CPUString = Subtarget->getCPUString(); + + if (CPUString != "generic") + ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); + + ATS.emitAttribute(ARMBuildAttrs::CPU_arch, + getArchForCPU(CPUString, Subtarget)); + + if (Subtarget->isAClass()) { + ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); + } else if (Subtarget->isRClass()) { + ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::RealTimeProfile); + } else if (Subtarget->isMClass()){ + ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::MicroControllerProfile); } - /* VFPv4 + .fpu */ - if (Subtarget->hasVFP4()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, - ARMBuildAttrs::AllowFPv4A); - if (emitFPU) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4"); - - /* VFPv3 + .fpu */ - } else if (Subtarget->hasVFP3()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, - ARMBuildAttrs::AllowFPv3A); - if (emitFPU) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3"); - - /* VFPv2 + .fpu */ - } else if (Subtarget->hasVFP2()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, - ARMBuildAttrs::AllowFPv2); - if (emitFPU) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2"); + ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, Subtarget->hasARMOps() ? + ARMBuildAttrs::Allowed : ARMBuildAttrs::Not_Allowed); + if (Subtarget->isThumb1Only()) { + ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::Allowed); + } else if (Subtarget->hasThumb2()) { + ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); } - /* TODO: ARMBuildAttrs::Allowed is not completely accurate, - * since NEON can have 1 (allowed) or 2 (MAC operations) */ if (Subtarget->hasNEON()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::Allowed); + /* NEON is not exactly a VFP architecture, but GAS emit one of + * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ + if (Subtarget->hasFPARMv8()) { + if (Subtarget->hasCrypto()) + ATS.emitFPU(ARM::CRYPTO_NEON_FP_ARMV8); + else + ATS.emitFPU(ARM::NEON_FP_ARMV8); + } + else if (Subtarget->hasVFP4()) + ATS.emitFPU(ARM::NEON_VFPV4); + else + ATS.emitFPU(ARM::NEON); + // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture + if (Subtarget->hasV8Ops()) + ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::AllowNeonARMv8); + } else { + if (Subtarget->hasFPARMv8()) + ATS.emitFPU(ARM::FP_ARMV8); + else if (Subtarget->hasVFP4()) + ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4); + else if (Subtarget->hasVFP3()) + ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV3_D16 : ARM::VFPV3); + else if (Subtarget->hasVFP2()) + ATS.emitFPU(ARM::VFPV2); } // Signal various FP modes. if (!TM.Options.UnsafeFPMath) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal, - ARMBuildAttrs::Allowed); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions, - ARMBuildAttrs::Allowed); + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, + ARMBuildAttrs::Allowed); } if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath) - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model, - ARMBuildAttrs::Allowed); + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, + ARMBuildAttrs::Allowed); else - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model, - ARMBuildAttrs::AllowIEE754); + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, + ARMBuildAttrs::AllowIEE754); // FIXME: add more flags to ARMBuildAttrs.h // 8-bytes alignment stuff. - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1); + ATS.emitAttribute(ARMBuildAttrs::ABI_align8_needed, 1); + ATS.emitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1); + + // ABI_HardFP_use attribute to indicate single precision FP. + if (Subtarget->isFPOnlySP()) + ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use, + ARMBuildAttrs::HardFPSinglePrecision); // Hard float. Use both S and D registers and conform to AAPCS-VFP. - if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1); - } + if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) + ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS); + // FIXME: Should we signal R9 usage? - if (Subtarget->hasDivide()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1); + if (Subtarget->hasFP16()) + ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); + + if (Subtarget->hasMPExtension()) + ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); + + if (Subtarget->hasDivide()) { + // Check if hardware divide is only available in thumb2 or ARM as well. + ATS.emitAttribute(ARMBuildAttrs::DIV_use, + Subtarget->hasDivideInARMMode() ? ARMBuildAttrs::AllowDIVExt : + ARMBuildAttrs::AllowDIVIfExists); + } - AttrEmitter->Finish(); - delete AttrEmitter; + if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization()) + ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowTZVirtualization); + else if (Subtarget->hasTrustZone()) + ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowTZ); + else if (Subtarget->hasVirtualization()) + ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowVirtualization); + + ATS.finishAttributeSection(); } void ARMAsmPrinter::emitARMAttributeSection() { @@ -908,7 +778,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { bool isIndirect = Subtarget->isTargetDarwin() && Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); if (!isIndirect) - return Mang->getSymbol(GV); + return getSymbol(GV); // FIXME: Remove this when Darwin transition to @GOT like syntax. MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); @@ -919,7 +789,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { MMIMachO.getGVStubEntry(MCSym); if (StubSym.getPointer() == 0) StubSym = MachineModuleInfoImpl:: - StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); + StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); return MCSym; } @@ -1092,27 +962,12 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); } -void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &OS) { - unsigned NOps = MI->getNumOperands(); - assert(NOps==4); - OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); - OS << V.getName(); - OS << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); - OS << ']'; - OS << "+"; - printOperand(MI, NOps-2, OS); -} - void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(MI->getFlag(MachineInstr::FrameSetup) && "Only instruction which are involved into frame setup code are allowed"); + MCTargetStreamer &TS = OutStreamer.getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); const MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>(); @@ -1175,7 +1030,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { RegList.push_back(SrcReg); break; } - OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); + ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); } else { // Changes of stack / frame pointer. if (SrcReg == ARM::SP) { @@ -1223,11 +1078,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { if (DstReg == FramePtr && FramePtr != ARM::SP) // Set-up of the frame pointer. Positive values correspond to "add" // instruction. - OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset); + ATS.emitSetFP(FramePtr, ARM::SP, -Offset); else if (DstReg == ARM::SP) { // Change of SP by an offset. Positive values correspond to "sub" // instruction. - OutStreamer.EmitPad(Offset); + ATS.emitPad(Offset); } else { MI->dump(); llvm_unreachable("Unsupported opcode for unwinding information"); @@ -1272,15 +1127,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned Opc = MI->getOpcode(); switch (Opc) { case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass"); - case ARM::DBG_VALUE: { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { - SmallString<128> TmpStr; - raw_svector_ostream OS(TmpStr); - PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); - } - return; - } + case ARM::DBG_VALUE: llvm_unreachable("Should be handled by generic printing"); case ARM::LEApcrel: case ARM::tLEApcrel: case ARM::t2LEApcrel: { @@ -1376,7 +1223,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(0)); const GlobalValue *GV = MI->getOperand(0).getGlobal(); - MCSymbol *GVSym = Mang->getSymbol(GV); + MCSymbol *GVSym = getSymbol(GV); const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc) .addExpr(GVSymExpr) diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h index c945e4f28699..de72e063e0d5 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -97,13 +97,9 @@ private: const MachineInstr *MI); public: - void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - - virtual MachineLocation - getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE; - /// EmitDwarfRegOp - Emit dwarf register operation. - virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const LLVM_OVERRIDE; + virtual void EmitDwarfRegOp(const MachineLocation &MLoc, bool Indirect) const + LLVM_OVERRIDE; virtual unsigned getISAEncoding() LLVM_OVERRIDE { // ARM/Darwin adds ISA to the DWARF info for each function. diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 60050542716c..f835a4e5b5fe 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "ARMBaseInstrInfo.h" #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" +#include "ARMFeatures.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" @@ -36,7 +37,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#define GET_INSTRINFO_CTOR +#define GET_INSTRINFO_CTOR_DTOR #include "ARMGenInstrInfo.inc" using namespace llvm; @@ -113,8 +114,7 @@ ScheduleHazardRecognizer *ARMBaseInstrInfo:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { if (Subtarget.isThumb2() || Subtarget.hasVFP2()) - return (ScheduleHazardRecognizer *) - new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); + return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG); return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } @@ -273,104 +273,90 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. + TBB = 0; + FBB = 0; + MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) - return false; + return false; // Empty blocks are easy. --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - unsigned LastOpc = LastInst->getOpcode(); - // Check if it's an indirect branch first, this should return 'unanalyzable' - // even if it's predicated. - if (isIndirectBranchOpcode(LastOpc)) - return true; + // Walk backwards from the end of the basic block until the branch is + // analyzed or we give up. + while (isPredicated(I) || I->isTerminator()) { - if (!isUnpredicatedTerminator(I)) - return false; + // Flag to be raised on unanalyzeable instructions. This is useful in cases + // where we want to clean up on the end of the basic block before we bail + // out. + bool CantAnalyze = false; - // If there is only one terminator instruction, process it. - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (isUncondBranchOpcode(LastOpc)) { - TBB = LastInst->getOperand(0).getMBB(); - return false; + // Skip over DEBUG values and predicated nonterminators. + while (I->isDebugValue() || !I->isTerminator()) { + if (I == MBB.begin()) + return false; + --I; } - if (isCondBranchOpcode(LastOpc)) { - // Block ends with fall-through condbranch. - TBB = LastInst->getOperand(0).getMBB(); - Cond.push_back(LastInst->getOperand(1)); - Cond.push_back(LastInst->getOperand(2)); - return false; + + if (isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode())) { + // Indirect branches and jump tables can't be analyzed, but we still want + // to clean up any instructions at the tail of the basic block. + CantAnalyze = true; + } else if (isUncondBranchOpcode(I->getOpcode())) { + TBB = I->getOperand(0).getMBB(); + } else if (isCondBranchOpcode(I->getOpcode())) { + // Bail out if we encounter multiple conditional branches. + if (!Cond.empty()) + return true; + + assert(!FBB && "FBB should have been null."); + FBB = TBB; + TBB = I->getOperand(0).getMBB(); + Cond.push_back(I->getOperand(1)); + Cond.push_back(I->getOperand(2)); + } else if (I->isReturn()) { + // Returns can't be analyzed, but we should run cleanup. + CantAnalyze = !isPredicated(I); + } else { + // We encountered other unrecognized terminator. Bail out immediately. + return true; } - return true; // Can't handle indirect branch. - } - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - - // If AllowModify is true and the block ends with two or more unconditional - // branches, delete all but the first unconditional branch. - if (AllowModify && isUncondBranchOpcode(LastOpc)) { - while (isUncondBranchOpcode(SecondLastOpc)) { - LastInst->eraseFromParent(); - LastInst = SecondLastInst; - LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - // Return now the only terminator is an unconditional branch. - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - SecondLastInst = I; - SecondLastOpc = SecondLastInst->getOpcode(); + // Cleanup code - to be run for unpredicated unconditional branches and + // returns. + if (!isPredicated(I) && + (isUncondBranchOpcode(I->getOpcode()) || + isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode()) || + I->isReturn())) { + // Forget any previous condition branch information - it no longer applies. + Cond.clear(); + FBB = 0; + + // If we can modify the function, delete everything below this + // unconditional branch. + if (AllowModify) { + MachineBasicBlock::iterator DI = llvm::next(I); + while (DI != MBB.end()) { + MachineInstr *InstToDelete = DI; + ++DI; + InstToDelete->eraseFromParent(); + } } } - } - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) - return true; - - // If the block ends with a B and a Bcc, handle it. - if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - Cond.push_back(SecondLastInst->getOperand(1)); - Cond.push_back(SecondLastInst->getOperand(2)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } + if (CantAnalyze) + return true; - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } + if (I == MBB.begin()) + return false; - // ...likewise if it ends with a branch table followed by an unconditional - // branch. The branch folder can create these, and we must get rid of them for - // correctness of Thumb constant islands. - if ((isJumpTableBranchOpcode(SecondLastOpc) || - isIndirectBranchOpcode(SecondLastOpc)) && - isUncondBranchOpcode(LastOpc)) { - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return true; + --I; } - // Otherwise, can't handle this. - return true; + // We made it past the terminators without bailing out - we must have + // analyzed this branch successfully. + return false; } @@ -535,11 +521,17 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { if (!MI->isPredicable()) return false; - if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { - ARMFunctionInfo *AFI = - MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); - return AFI->isThumb2Function(); + ARMFunctionInfo *AFI = + MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); + + if (AFI->isThumb2Function()) { + if (getSubtarget().restrictIT()) + return isV8EligibleForIT(MI); + } else { // non-Thumb + if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) + return false; } + return true; } @@ -660,16 +652,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { bool GPRDest = ARM::GPRRegClass.contains(DestReg); - bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); + bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); if (GPRDest && GPRSrc) { AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)))); + .addReg(SrcReg, getKillRegState(KillSrc)))); return; } bool SPRDest = ARM::SPRRegClass.contains(DestReg); - bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); + bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); unsigned Opc = 0; if (SPRDest && SPRSrc) @@ -698,26 +690,47 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, int Spacing = 1; // Use VORRq when possible. - if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2; - else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4; + if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VORRq; + BeginIdx = ARM::qsub_0; + SubRegs = 2; + } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VORRq; + BeginIdx = ARM::qsub_0; + SubRegs = 4; // Fall back to VMOVD. - else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2; - else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; - else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; - else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) - Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2; - - else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; - else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2; - else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; + } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 2; + } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 3; + } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 4; + } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) { + Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; + BeginIdx = ARM::gsub_0; + SubRegs = 2; + } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 2; + Spacing = 2; + } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 3; + Spacing = 2; + } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 4; + Spacing = 2; + } assert(Opc && "Impossible reg-to-reg copy"); @@ -726,26 +739,28 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy register tuples backward when the first Dest reg overlaps with SrcReg. if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { - BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); + BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); Spacing = -Spacing; } #ifndef NDEBUG SmallSet<unsigned, 4> DstRegs; #endif for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); assert(Dst && Src && "Bad sub-register"); #ifndef NDEBUG assert(!DstRegs.count(Src) && "destructive vector copy"); DstRegs.insert(Dst); #endif - Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src); + Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); // VORR takes two source operands. if (Opc == ARM::VORRq) Mov.addReg(Src); Mov = AddDefaultPred(Mov); + // MOVr can set CC. + if (Opc == ARM::MOVr) + Mov = AddDefaultCC(Mov); } // Add implicit super-register defs and kills to the last instruction. Mov->addRegisterDefined(DestReg, TRI); @@ -1214,16 +1229,6 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ return true; } -MachineInstr* -ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - /// Create a copy of a const pool value. Update CPI to the new index and return /// the label UID. static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { @@ -1426,9 +1431,11 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: + case ARM::t2LDRBi8: case ARM::t2LDRDi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: + case ARM::t2LDRBi12: case ARM::t2LDRSHi12: break; } @@ -1445,8 +1452,10 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: + case ARM::t2LDRBi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: + case ARM::t2LDRBi12: case ARM::t2LDRSHi12: break; } @@ -1493,7 +1502,16 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, if ((Offset2 - Offset1) / 8 > 64) return false; - if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) + // Check if the machine opcodes are different. If they are different + // then we consider them to not be of the same base address, + // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. + // In this case, they are considered to be the same because they are different + // encoding forms of the same basic instruction. + if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && + !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && + Load2->getMachineOpcode() == ARM::t2LDRBi12) || + (Load1->getMachineOpcode() == ARM::t2LDRBi12 && + Load2->getMachineOpcode() == ARM::t2LDRBi8))) return false; // FIXME: overly conservative? // Four loads in a row should be sufficient. @@ -1708,7 +1726,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, bool PreferFalse) const { assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); bool Invert = !DefMI; if (!DefMI) @@ -1716,11 +1734,17 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, if (!DefMI) return 0; + // Find new register class to use. + MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); + unsigned DestReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); + if (!MRI.constrainRegClass(DestReg, PreviousClass)) + return 0; + // Create a new predicated version of DefMI. // Rfalse is the first use. MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - DefMI->getDesc(), - MI->getOperand(0).getReg()); + DefMI->getDesc(), DestReg); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); @@ -1743,7 +1767,6 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, // register operand tied to the first def. // The tie makes the register allocator ensure the FalseReg is allocated the // same register as operand 0. - MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); FalseReg.setImplicit(); NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); @@ -1803,6 +1826,14 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags) { + if (NumBytes == 0 && DestReg != BaseReg) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) + .addReg(BaseReg, RegState::Kill) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); + return; + } + bool isSub = NumBytes < 0; if (isSub) NumBytes = -NumBytes; @@ -1826,6 +1857,115 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, } } +bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF, + MachineInstr *MI, + unsigned NumBytes) { + // This optimisation potentially adds lots of load and store + // micro-operations, it's only really a great benefit to code-size. + if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize)) + return false; + + // If only one register is pushed/popped, LLVM can use an LDR/STR + // instead. We can't modify those so make sure we're dealing with an + // instruction we understand. + bool IsPop = isPopOpcode(MI->getOpcode()); + bool IsPush = isPushOpcode(MI->getOpcode()); + if (!IsPush && !IsPop) + return false; + + bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || + MI->getOpcode() == ARM::VLDMDIA_UPD; + bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || + MI->getOpcode() == ARM::tPOP || + MI->getOpcode() == ARM::tPOP_RET; + + assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && + MI->getOperand(1).getReg() == ARM::SP)) && + "trying to fold sp update into non-sp-updating push/pop"); + + // The VFP push & pop act on D-registers, so we can only fold an adjustment + // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try + // if this is violated. + if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) + return false; + + // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ + // pred) so the list starts at 4. Thumb1 starts after the predicate. + int RegListIdx = IsT1PushPop ? 2 : 4; + + // Calculate the space we'll need in terms of registers. + unsigned FirstReg = MI->getOperand(RegListIdx).getReg(); + unsigned RD0Reg, RegsNeeded; + if (IsVFPPushPop) { + RD0Reg = ARM::D0; + RegsNeeded = NumBytes / 8; + } else { + RD0Reg = ARM::R0; + RegsNeeded = NumBytes / 4; + } + + // We're going to have to strip all list operands off before + // re-adding them since the order matters, so save the existing ones + // for later. + SmallVector<MachineOperand, 4> RegList; + for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) + RegList.push_back(MI->getOperand(i)); + + MachineBasicBlock *MBB = MI->getParent(); + const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + + // Now try to find enough space in the reglist to allocate NumBytes. + for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded; + --CurReg) { + if (!IsPop) { + // Pushing any register is completely harmless, mark the + // register involved as undef since we don't care about it in + // the slightest. + RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, + false, false, true)); + --RegsNeeded; + continue; + } + + // However, we can only pop an extra register if it's not live. For + // registers live within the function we might clobber a return value + // register; the other way a register can be live here is if it's + // callee-saved. + if (isCalleeSavedRegister(CurReg, CSRegs) || + MBB->computeRegisterLiveness(TRI, CurReg, MI) != + MachineBasicBlock::LQR_Dead) { + // VFP pops don't allow holes in the register list, so any skip is fatal + // for our transformation. GPR pops do, so we should just keep looking. + if (IsVFPPushPop) + return false; + else + continue; + } + + // Mark the unimportant registers as <def,dead> in the POP. + RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false, + true)); + --RegsNeeded; + } + + if (RegsNeeded > 0) + return false; + + // Finally we know we can profitably perform the optimisation so go + // ahead: strip all existing registers off and add them back again + // in the right order. + for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) + MI->RemoveOperand(i); + + // Add the complete list back in. + MachineInstrBuilder MIB(MF, &*MI); + for (int i = RegList.size() - 1; i >= 0; --i) + MIB.addOperand(RegList[i]); + + return true; +} + bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII) { @@ -2232,8 +2372,32 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, isSafe = true; break; } - // Condition code is after the operand before CPSR. - ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); + // Condition code is after the operand before CPSR except for VSELs. + ARMCC::CondCodes CC; + bool IsInstrVSel = true; + switch (Instr.getOpcode()) { + default: + IsInstrVSel = false; + CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); + break; + case ARM::VSELEQD: + case ARM::VSELEQS: + CC = ARMCC::EQ; + break; + case ARM::VSELGTD: + case ARM::VSELGTS: + CC = ARMCC::GT; + break; + case ARM::VSELGED: + case ARM::VSELGES: + CC = ARMCC::GE; + break; + case ARM::VSELVSS: + case ARM::VSELVSD: + CC = ARMCC::VS; + break; + } + if (Sub) { ARMCC::CondCodes NewCC = getSwappedCondition(CC); if (NewCC == ARMCC::AL) @@ -2244,11 +2408,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // If it is safe to remove CmpInstr, the condition code of these // operands will be modified. if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) - OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), - NewCC)); - } - else + Sub->getOperand(2).getReg() == SrcReg) { + // VSel doesn't support condition code update. + if (IsInstrVSel) + return false; + OperandsToUpdate.push_back( + std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); + } + } else switch (CC) { default: // CPSR can be used multiple times, we should continue. @@ -3604,6 +3771,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const { + if (MI->isCopyLike() || MI->isInsertSubreg() || + MI->isRegSequence() || MI->isImplicitDef()) + return 0; + + if (MI->isBundle()) + return 0; + + const MCInstrDesc &MCID = MI->getDesc(); + + if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { + // When predicated, CPSR is an additional source operand for CPSR updating + // instructions, this apparently increases their latencies. + return 1; + } + return 0; +} + unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -3685,8 +3870,7 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, return true; // Hoist VFP / NEON instructions with 4 or higher latency. - int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx, - /*FindMin=*/false); + int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); if (Latency < 0) Latency = getInstrLatency(ItinData, DefMI); if (Latency <= 3) @@ -4137,7 +4321,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines // the full D-register by loading the same value to both lanes. The // instruction is micro-coded with 2 uops, so don't do this until we can - // properly schedule micro-coded instuctions. The dispatcher stalls cause + // properly schedule micro-coded instructions. The dispatcher stalls cause // too big regressions. // Insert the dependency-breaking FCONSTD before MI. @@ -4152,6 +4336,8 @@ bool ARMBaseInstrInfo::hasNOP() const { } bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { + if (MI->getNumOperands() < 4) + return true; unsigned ShOpVal = MI->getOperand(3).getImm(); unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 2ef659c23bd6..93e59647d220 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -46,7 +46,7 @@ public: MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; - virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; + virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0; const ARMSubtarget &getSubtarget() const { return Subtarget; } ScheduleHazardRecognizer * @@ -125,12 +125,6 @@ public: virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const; - virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, @@ -270,6 +264,8 @@ private: const MCInstrDesc &UseMCID, unsigned UseIdx, unsigned UseAlign) const; + unsigned getPredicationCost(const MachineInstr *MI) const; + unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost = 0) const; @@ -366,6 +362,17 @@ bool isIndirectBranchOpcode(int Opc) { return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND; } +static inline bool isPopOpcode(int Opc) { + return Opc == ARM::tPOP_RET || Opc == ARM::LDMIA_RET || + Opc == ARM::t2LDMIA_RET || Opc == ARM::tPOP || Opc == ARM::LDMIA_UPD || + Opc == ARM::t2LDMIA_UPD || Opc == ARM::VLDMDIA_UPD; +} + +static inline bool isPushOpcode(int Opc) { + return Opc == ARM::tPUSH || Opc == ARM::t2STMDB_UPD || + Opc == ARM::STMDB_UPD || Opc == ARM::VSTMDDB_UPD; +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. @@ -405,6 +412,13 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, const ARMBaseRegisterInfo& MRI, unsigned MIFlags = 0); +/// Tries to add registers to the reglist of a given base-updating +/// push/pop instruction to adjust the stack by an additional +/// NumBytes. This can save a few bytes per function in code-size, but +/// obviously generates more memory traffic. As such, it only takes +/// effect in functions being optimised for size. +bool tryFoldSPUpdateIntoPushPop(MachineFunction &MF, MachineInstr *MI, + unsigned NumBytes); /// rewriteARMFrameIndex / rewriteT2FrameIndex - /// Rewrite MI to access 'Offset' bytes from the FP. Return false if the diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b0d34a76b014..8717dc0cde90 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -43,46 +43,73 @@ using namespace llvm; -ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti), +ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), BasePtr(ARM::R6) { } const uint16_t* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - bool ghcCall = false; - - if (MF) { - const Function *F = MF->getFunction(); - ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); - } - - if (ghcCall) { - return CSR_GHC_SaveList; - } - else { - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; + const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_SaveList + : CSR_AAPCS_SaveList; + + if (!MF) return RegList; + + const Function *F = MF->getFunction(); + if (F->getCallingConv() == CallingConv::GHC) { + // GHC set of callee saved regs is empty as all those regs are + // used for passing STG regs around + return CSR_NoRegs_SaveList; + } else if (F->hasFnAttribute("interrupt")) { + if (STI.isMClass()) { + // M-class CPUs have hardware which saves the registers needed to allow a + // function conforming to the AAPCS to function as a handler. + return CSR_AAPCS_SaveList; + } else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") { + // Fast interrupt mode gives the handler a private copy of R8-R14, so less + // need to be saved to restore user-mode state. + return CSR_FIQ_SaveList; + } else { + // Generally only R13-R14 (i.e. SP, LR) are automatically preserved by + // exception handling. + return CSR_GenericInt_SaveList; + } } + + return RegList; } const uint32_t* -ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { +ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return CSR_NoRegs_RegMask; return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } const uint32_t* -ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; +ARMBaseRegisterInfo::getNoPreservedMask() const { + return CSR_NoRegs_RegMask; } const uint32_t* -ARMBaseRegisterInfo::getNoPreservedMask() const { - return CSR_NoRegs_RegMask; +ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { + // This should return a register mask that is the same as that returned by + // getCallPreservedMask but that additionally preserves the register used for + // the first i32 argument (which must also be the register used to return a + // single i32 return value) + // + // In case that the calling convention does not use the same register for + // both or otherwise does not want to enable this optimization, the function + // should return NULL + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return NULL; + return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; } BitVector ARMBaseRegisterInfo:: @@ -94,6 +121,7 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(ARM::SP); Reserved.set(ARM::PC); Reserved.set(ARM::FPSCR); + Reserved.set(ARM::APSR_NZCV); if (TFI->hasFP(MF)) Reserved.set(FramePtr); if (hasBasePointer(MF)) @@ -309,7 +337,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. - if (!MF.getTarget().Options.RealignStack) + if (MF.getFunction()->hasFnAttribute("no-realign-stack")) return false; if (AFI->isThumb1OnlyFunction()) return false; @@ -357,14 +385,6 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return ARM::SP; } -unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { - llvm_unreachable("What is the exception register"); -} - -unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { - llvm_unreachable("What is the exception handler register"); -} - /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. void ARMBaseRegisterInfo:: @@ -375,6 +395,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); @@ -556,9 +577,10 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, if (Ins != MBB->end()) DL = Ins->getDebugLoc(); - const MCInstrDesc &MCID = TII.get(ADDriOpc); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const MCInstrDesc &MCID = TII.get(ADDriOpc); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) @@ -574,6 +596,8 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, MachineInstr &MI = *I; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -671,6 +695,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -696,12 +722,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } #endif // NDEBUG - // Special handling of dbg_value instructions. - if (MI.isDebugValue()) { - MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); - return; - } + assert(!MI.isDebugValue() && "DBG_VALUEs should be handled in target-independent code"); // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 0679919152c0..e28fff68f4e2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -72,9 +72,16 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) { } } +static inline bool isCalleeSavedRegister(unsigned Reg, + const MCPhysReg *CSRegs) { + for (unsigned i = 0; CSRegs[i]; ++i) + if (Reg == CSRegs[i]) + return true; + return false; +} + class ARMBaseRegisterInfo : public ARMGenRegisterInfo { protected: - const ARMBaseInstrInfo &TII; const ARMSubtarget &STI; /// FramePtr - ARM physical register used as frame ptr. @@ -86,8 +93,7 @@ protected: unsigned BasePtr; // Can be only subclassed. - explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &STI); + explicit ARMBaseRegisterInfo(const ARMSubtarget &STI); // Return the opcode that implements 'Op', or 0 if no opcode unsigned getOpcode(int Op) const; @@ -96,9 +102,18 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; - const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; const uint32_t *getNoPreservedMask() const; + /// getThisReturnPreservedMask - Returns a call preserved mask specific to the + /// case that 'returned' is on an i32 first argument if the calling convention + /// is one that can (partially) model this attribute with a preserved mask + /// (i.e. it is a calling convention that uses the same register for the first + /// i32 argument and an i32 return value) + /// + /// Should return NULL in the case that the calling convention does not have + /// this property + const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; + BitVector getReservedRegs(const MachineFunction &MF) const; const TargetRegisterClass* @@ -142,10 +157,6 @@ public: unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getBaseRegister() const { return BasePtr; } - // Exception handling queries. - unsigned getEHExceptionRegister() const; - unsigned getEHHandlerRegister() const; - bool isLowRegister(unsigned Reg) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h index 11bd6a4a8dbc..b16d4ef54b6d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h +++ b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h @@ -15,11 +15,13 @@ #ifndef __TARGET_ARMBUILDATTRS_H__ #define __TARGET_ARMBUILDATTRS_H__ +namespace llvm { namespace ARMBuildAttrs { + enum SpecialAttr { // This is for the .cpu asm attr. It translates into one or more // AttrType (below) entries in the .ARM.attributes section in the ELF. - SEL_CPU + SEL_CPU }; enum AttrType { @@ -57,7 +59,7 @@ namespace ARMBuildAttrs { ABI_FP_optimization_goals = 31, compatibility = 32, CPU_unaligned_access = 34, - VFP_HP_extension = 36, + FP_HP_extension = 36, ABI_FP_16bit_format = 38, MPextension_use = 42, // was 70, 2.08 ABI DIV_use = 44, @@ -89,10 +91,11 @@ namespace ARMBuildAttrs { v7 = 10, // e.g. Cortex A8, Cortex M3 v6_M = 11, // e.g. Cortex M1 v6S_M = 12, // v6_M with the System extensions - v7E_M = 13 // v7_M with DSP extensions + v7E_M = 13, // v7_M with DSP extensions + v8 = 14 // v8, AArch32 }; - enum CPUArchProfile { // (=7), uleb128 + enum CPUArchProfile { // (=7), uleb128 Not_Applicable = 0, // pre v7, or cross-profile code ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8) RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4) @@ -101,31 +104,67 @@ namespace ARMBuildAttrs { }; // The following have a lot of common use cases - enum { - //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128 + enum { Not_Allowed = 0, Allowed = 1, - // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10) + // Tag_ARM_ISA_use (=8), uleb128 + + // Tag_THUMB_ISA_use, (=9), uleb128 + AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions) + + // Tag_FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10) AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA) AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA) - AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 - AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) + AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 + AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31 + AllowFPARMv8A = 7, // Use of the ARM v8-A FP ISA was permitted + AllowFPARMv8B = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31 // Tag_WMMX_arch, (=11), uleb128 - AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions) - - // Tag_WMMX_arch, (=11), uleb128 - AllowWMMXv1 = 2, // The user permitted this entity to use WMMX v2 + AllowWMMXv1 = 1, // The user permitted this entity to use WMMX v1 + AllowWMMXv2 = 2, // The user permitted this entity to use WMMX v2 + + // Tag_Advanced_SIMD_arch, (=12), uleb128 + AllowNeon = 1, // SIMDv1 was permitted + AllowNeon2 = 2, // SIMDv2 was permitted (Half-precision FP, MAC operations) + AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted - // Tag_ABI_FP_denormal, (=20), uleb128 + // Tag_ABI_FP_denormal, (=20), uleb128 PreserveFPSign = 2, // sign when flushed-to-zero is preserved // Tag_ABI_FP_number_model, (=23), uleb128 AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI]) - AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings + AllowIEE754 = 3, // this code to use all the IEEE 754-defined FP encodings + + // Tag_ABI_HardFP_use, (=27), uleb128 + HardFPImplied = 0, // FP use should be implied by Tag_FP_arch + HardFPSinglePrecision = 1, // Single-precision only + + // Tag_ABI_VFP_args, (=28), uleb128 + BaseAAPCS = 0, + HardFPAAPCS = 1, + + // Tag_FP_HP_extension, (=36), uleb128 + AllowHPFP = 1, // Allow use of Half Precision FP + + // Tag_MPextension_use, (=42), uleb128 + AllowMP = 1, // Allow use of MP extensions + + // Tag_DIV_use, (=44), uleb128 + AllowDIVIfExists = 0, // Allow hardware divide if available in arch, or no info exists. + DisallowDIV = 1, // Hardware divide explicitly disallowed + AllowDIVExt = 2, // Allow hardware divide as optional architecture extension above + // the base arch specified by Tag_CPU_arch and Tag_CPU_arch_profile. + + // Tag_Virtualization_use, (=68), uleb128 + AllowTZ = 1, + AllowVirtualization = 2, + AllowTZVirtualization = 3 }; -} + +} // namespace ARMBuildAttrs +} // namespace llvm #endif // __TARGET_ARMBUILDATTRS_H__ diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index 8ff666ed2844..9bea4b2d68e9 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -207,10 +207,24 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, - (sub CSR_AAPCS_ThisReturn, R9))>; + (sub CSR_AAPCS_ThisReturn, R9))>; + +// The "interrupt" attribute is used to generate code that is acceptable in +// exception-handlers of various kinds. It makes us use a different return +// instruction (handled elsewhere) and affects which registers we must return to +// our "caller" in the same state as we receive them. + +// For most interrupts, all registers except SP and LR are shared with +// user-space. We mark LR to be saved anyway, since this is what the ARM backend +// generally does rather than tracking its liveness as a normal register. +def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>; + +// The fast interrupt handlers have more private state and get their own copies +// of R8-R12, in addition to SP and LR. As before, mark LR for saving too. + +// FIXME: we mark R11 as callee-saved since it's often the frame-pointer, and +// current frame lowering expects to encounter it while processing callee-saved +// registers. +def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>; + -// GHC set of callee saved regs is empty as all those regs are -// used for passing STG regs around -// add is a workaround for not being able to compile empty list: -// def CSR_GHC : CalleeSavedRegs<()>; -def CSR_GHC : CalleeSavedRegs<(add)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp index 5e8e1739a984..568ca858c4d2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp @@ -167,6 +167,8 @@ namespace { const { return 0; } unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) const { return 0; } + unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val) const { return 0; } unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op) @@ -1044,8 +1046,8 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, return; } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) { uint32_t v = ~MI.getOperand(2).getImm(); - int32_t lsb = CountTrailingZeros_32(v); - int32_t msb = (32 - CountLeadingZeros_32(v)) - 1; + int32_t lsb = countTrailingZeros(v); + int32_t msb = (32 - countLeadingZeros(v)) - 1; // Instr{20-16} = msb, Instr{11-7} = lsb Binary |= (msb & 0x1F) << 16; Binary |= (lsb & 0x1F) << 7; diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 4891609b336f..cff5ce27bca6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -128,7 +128,7 @@ namespace { // If the block size isn't a multiple of the known bits, assume the // worst case padding. if (Size & ((1u << Bits) - 1)) - Bits = CountTrailingZeros_32(Size); + Bits = countTrailingZeros(Size); return Bits; } @@ -753,6 +753,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { Scale = 4; break; + case ARM::LDRBi12: case ARM::LDRi12: case ARM::LDRcp: case ARM::t2LDRpci: diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp index 4e703ec3c1a8..7d41c69f08b8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -163,21 +163,7 @@ const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const { int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; - const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - if (Constants[i].isMachineConstantPoolEntry() && - (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - ARMConstantPoolConstant *APC = dyn_cast<ARMConstantPoolConstant>(CPV); - if (!APC) continue; - if (APC->CVal == CVal && equals(APC)) - return i; - } - } - - return -1; + return getExistingMachineCPValueImpl<ARMConstantPoolConstant>(CP, Alignment); } bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) { @@ -216,22 +202,7 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; - const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - if (Constants[i].isMachineConstantPoolEntry() && - (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV); - if (!APS) continue; - - if (APS->S == S && equals(APS)) - return i; - } - } - - return -1; + return getExistingMachineCPValueImpl<ARMConstantPoolSymbol>(CP, Alignment); } bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) { @@ -271,22 +242,7 @@ ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C, int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; - const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - if (Constants[i].isMachineConstantPoolEntry() && - (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - ARMConstantPoolMBB *APMBB = dyn_cast<ARMConstantPoolMBB>(CPV); - if (!APMBB) continue; - - if (APMBB->MBB == MBB && equals(APMBB)) - return i; - } - } - - return -1; + return getExistingMachineCPValueImpl<ARMConstantPoolMBB>(CP, Alignment); } bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) { diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index 93812fe6bb37..7ae7bf46f19d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -15,6 +15,7 @@ #define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include <cstddef> @@ -64,6 +65,26 @@ protected: ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind, unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, bool AddCurrentAddress); + + template <typename Derived> + int getExistingMachineCPValueImpl(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].getAlignment() & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + if (Derived *APC = dyn_cast<Derived>(CPV)) + if (cast<Derived>(this)->equals(APC)) + return i; + } + } + + return -1; + } + public: virtual ~ARMConstantPoolValue(); @@ -156,6 +177,10 @@ public: static bool classof(const ARMConstantPoolValue *APV) { return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA(); } + + bool equals(const ARMConstantPoolConstant *A) const { + return CVal == A->CVal && ARMConstantPoolValue::equals(A); + } }; /// ARMConstantPoolSymbol - ARM-specific constantpool values for external @@ -187,6 +212,10 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isExtSymbol(); } + + bool equals(const ARMConstantPoolSymbol *A) const { + return S == A->S && ARMConstantPoolValue::equals(A); + } }; /// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic @@ -219,6 +248,10 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isMachineBasicBlock(); } + + bool equals(const ARMConstantPoolMBB *A) const { + return MBB == A->MBB && ARMConstantPoolValue::equals(A); + } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index beb843ca9aa8..e6f7f86c5587 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -692,10 +692,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), MI.getOperand(1).getReg()) - .addReg(MI.getOperand(2).getReg(), - getKillRegState(MI.getOperand(2).isKill())) + .addOperand(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) // 'pred' - .addReg(MI.getOperand(4).getReg()); + .addOperand(MI.getOperand(4)); MI.eraseFromParent(); return true; @@ -705,10 +704,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) - .addReg(MI.getOperand(2).getReg(), - getKillRegState(MI.getOperand(2).isKill())) + .addOperand(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) // 'pred' - .addReg(MI.getOperand(4).getReg()) + .addOperand(MI.getOperand(4)) .addReg(0); // 's' bit MI.eraseFromParent(); @@ -717,39 +715,36 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVCCsi: { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), (MI.getOperand(1).getReg())) - .addReg(MI.getOperand(2).getReg(), - getKillRegState(MI.getOperand(2).isKill())) + .addOperand(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) .addImm(MI.getOperand(4).getImm()) // 'pred' - .addReg(MI.getOperand(5).getReg()) + .addOperand(MI.getOperand(5)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } - case ARM::MOVCCsr: { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), (MI.getOperand(1).getReg())) - .addReg(MI.getOperand(2).getReg(), - getKillRegState(MI.getOperand(2).isKill())) - .addReg(MI.getOperand(3).getReg(), - getKillRegState(MI.getOperand(3).isKill())) + .addOperand(MI.getOperand(2)) + .addOperand(MI.getOperand(3)) .addImm(MI.getOperand(4).getImm()) .addImm(MI.getOperand(5).getImm()) // 'pred' - .addReg(MI.getOperand(6).getReg()) + .addOperand(MI.getOperand(6)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } + case ARM::t2MOVCCi16: case ARM::MOVCCi16: { - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16), + unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' - .addReg(MI.getOperand(4).getReg()); - + .addOperand(MI.getOperand(4)); MI.eraseFromParent(); return true; } @@ -760,23 +755,47 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' - .addReg(MI.getOperand(4).getReg()) + .addOperand(MI.getOperand(4)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } + case ARM::t2MVNCCi: case ARM::MVNCCi: { - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), + unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' - .addReg(MI.getOperand(4).getReg()) + .addOperand(MI.getOperand(4)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } + case ARM::t2MOVCClsl: + case ARM::t2MOVCClsr: + case ARM::t2MOVCCasr: + case ARM::t2MOVCCror: { + unsigned NewOpc; + switch (Opcode) { + case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break; + case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break; + case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break; + case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break; + default: llvm_unreachable("unexpeced conditional move"); + } + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), + MI.getOperand(1).getReg()) + .addOperand(MI.getOperand(2)) + .addImm(MI.getOperand(3).getImm()) + .addImm(MI.getOperand(4).getImm()) // 'pred' + .addOperand(MI.getOperand(5)) + .addReg(0); // 's' bit + MI.eraseFromParent(); + return true; + } case ARM::Int_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = @@ -823,7 +842,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVsrl_flag: case ARM::MOVsra_flag: { - // These are just fancy MOVs insructions. + // These are just fancy MOVs instructions. AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), MI.getOperand(0).getReg()) .addOperand(MI.getOperand(1)) @@ -938,6 +957,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandMOV32BitImm(MBB, MBBI); return true; + case ARM::SUBS_PC_LR: { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) + .addReg(ARM::LR) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(2)) + .addReg(ARM::CPSR, RegState::Undef); + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; + } case ARM::VLDMQIA: { unsigned NewOpc = ARM::VLDMDIA; MachineInstrBuilder MIB = diff --git a/contrib/llvm/lib/Target/ARM/ARMFPUName.def b/contrib/llvm/lib/Target/ARM/ARMFPUName.def new file mode 100644 index 000000000000..9a1bbe703d99 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMFPUName.def @@ -0,0 +1,32 @@ +//===-- ARMFPUName.def - List of the ARM FPU names --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the list of the supported ARM FPU names. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef ARM_FPU_NAME +#error "You must define ARM_FPU_NAME(NAME, ID) before including ARMFPUName.h" +#endif + +ARM_FPU_NAME("vfp", VFP) +ARM_FPU_NAME("vfpv2", VFPV2) +ARM_FPU_NAME("vfpv3", VFPV3) +ARM_FPU_NAME("vfpv3-d16", VFPV3_D16) +ARM_FPU_NAME("vfpv4", VFPV4) +ARM_FPU_NAME("vfpv4-d16", VFPV4_D16) +ARM_FPU_NAME("fp-armv8", FP_ARMV8) +ARM_FPU_NAME("neon", NEON) +ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4) +ARM_FPU_NAME("neon-fp-armv8", NEON_FP_ARMV8) +ARM_FPU_NAME("crypto-neon-fp-armv8", CRYPTO_NEON_FP_ARMV8) + +#undef ARM_FPU_NAME diff --git a/contrib/llvm/lib/Target/ARM/ARMFPUName.h b/contrib/llvm/lib/Target/ARM/ARMFPUName.h new file mode 100644 index 000000000000..2a64cce4880d --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMFPUName.h @@ -0,0 +1,26 @@ +//===-- ARMFPUName.h - List of the ARM FPU names ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMFPUNAME_H +#define ARMFPUNAME_H + +namespace llvm { +namespace ARM { + +enum FPUKind { + INVALID_FPU = 0 + +#define ARM_FPU_NAME(NAME, ID) , ID +#include "ARMFPUName.def" +}; + +} // namespace ARM +} // namespace llvm + +#endif // ARMFPUNAME_H diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 5d45f6491240..a4004f32db37 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -20,6 +20,7 @@ #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -175,6 +176,8 @@ class ARMFastISel : public FastISel { // Utility routines. private: + unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum, + unsigned Op); bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -251,10 +254,10 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); - // If we're a thumb2 or not NEON function we were handled via isPredicable. + // If we're a thumb2 or not NEON function we'll be handled via isPredicable. if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || AFI->isThumb2Function()) - return false; + return MI->isPredicable(); for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) if (MCID.OpInfo[i].isPredicate()) @@ -275,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { // Do we use a predicate? or... // Are we NEON in ARM mode and have a predicate operand? If so, I know // we're not predicable but add it anyways. - if (TII.isPredicable(MI) || isARMNEONPred(MI)) + if (isARMNEONPred(MI)) AddDefaultPred(MIB); // Do we optionally set a predicate? Preds is size > 0 iff the predicate @@ -290,6 +293,23 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { return MIB; } +unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II, + unsigned Op, unsigned OpNum) { + if (TargetRegisterInfo::isVirtualRegister(Op)) { + const TargetRegisterClass *RegClass = + TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); + if (!MRI.constrainRegClass(Op, RegClass)) { + // If it's not legal to COPY between the register classes, something + // has gone very wrong before we got here. + unsigned NewOp = createResultReg(RegClass); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), NewOp).addReg(Op)); + return NewOp; + } + } + return Op; +} + unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); @@ -305,6 +325,9 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill)); @@ -325,6 +348,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -348,6 +376,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); + Op2 = constrainOperandRegClass(II, Op1, 3); + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -372,6 +406,9 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -394,6 +431,9 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -417,6 +457,10 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -609,6 +653,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { .addConstantPoolIndex(Idx)); else // The extra immediate is for addrmode2. + DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) @@ -628,6 +673,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); + // FastISel TLS support on non-Darwin is broken, punt to SelectionDAG. + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + bool IsThreadLocal = GVar && GVar->isThreadLocal(); + if (!Subtarget->isTargetDarwin() && IsThreadLocal) return 0; + // Use movw+movt when possible, it avoids constant pool entries. // Darwin targets don't support movt with Reloc::Static, see // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support @@ -679,6 +729,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { AddOptionalDefs(MIB); } else { // The extra immediate is for addrmode2. + DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) @@ -814,22 +865,19 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { switch (Opcode) { default: break; - case Instruction::BitCast: { + case Instruction::BitCast: // Look through bitcasts. return ARMComputeAddress(U->getOperand(0), Addr); - } - case Instruction::IntToPtr: { + case Instruction::IntToPtr: // Look past no-op inttoptrs. if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) return ARMComputeAddress(U->getOperand(0), Addr); break; - } - case Instruction::PtrToInt: { + case Instruction::PtrToInt: // Look past no-op ptrtoints. if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) return ARMComputeAddress(U->getOperand(0), Addr); break; - } case Instruction::GetElementPtr: { Address SavedAddr = Addr; int TmpOffset = Addr.Offset; @@ -852,13 +900,8 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { TmpOffset += CI->getSExtValue() * S; break; } - if (isa<AddOperator>(Op) && - (!isa<Instruction>(Op) || - FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] - == FuncInfo.MBB) && - isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { - // An add (in the same block) with a constant operand. Fold the - // constant. + if (canFoldAddIntoGEP(U, Op)) { + // A compatible add with a constant operand. Fold the constant. ConstantInt *CI = cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); TmpOffset += CI->getSExtValue() * S; @@ -1025,7 +1068,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, useAM3 = true; } } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i16: if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) @@ -1040,7 +1083,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, Opc = isZExt ? ARM::LDRH : ARM::LDRSH; useAM3 = true; } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i32: if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) @@ -1054,7 +1097,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, } else { Opc = ARM::LDRi12; } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; @@ -1063,7 +1106,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, needVMOV = true; VT = MVT::i32; Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; } else { Opc = ARM::VLDRS; RC = TLI.getRegClassFor(VT); @@ -1136,6 +1179,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, (const TargetRegisterClass*)&ARM::tGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass); unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; + SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), Res) .addReg(SrcReg).addImm(1)); @@ -1207,6 +1251,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, ARMSimplifyAddress(Addr, VT, useAM3); // Create the base instruction, then add the operands. + SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) .addReg(SrcReg); @@ -1330,6 +1375,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; unsigned OpReg = getRegForValue(TI->getOperand(0)); + OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) .addReg(OpReg).addImm(1)); @@ -1367,6 +1413,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // and it left a value for us in a virtual register. Ergo, we test // the one-bit value left in the virtual register. unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; + CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) .addReg(CmpReg).addImm(1)); @@ -1491,13 +1538,15 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, } } + const MCInstrDesc &II = TII.get(CmpOpc); + SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0); if (!UseImm) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CmpOpc)) + SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(SrcReg1).addReg(SrcReg2)); } else { MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(SrcReg1); // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. @@ -1696,6 +1745,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { } unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri; + CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) .addReg(CondReg).addImm(0)); @@ -1712,12 +1762,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi; } unsigned ResultReg = createResultReg(RC); - if (!UseImm) + if (!UseImm) { + Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1); + Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR); - else + } else { + Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR); + } UpdateValueMap(I, ResultReg); return true; } @@ -1802,7 +1856,9 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { unsigned SrcReg2 = getRegForValue(I->getOperand(1)); if (SrcReg2 == 0) return false; - unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass); + SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1); + SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2)); @@ -1930,7 +1986,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, !VA.isRegLoc() || !ArgLocs[++i].isRegLoc()) return false; } else { - switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) { + switch (ArgVT.SimpleTy) { default: return false; case MVT::i1: @@ -1985,7 +2041,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); - assert (Arg != 0 && "Failed to emit a sext"); + assert (Arg != 0 && "Failed to emit a zext"); ArgVT = DestVT; break; } @@ -2182,10 +2238,14 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { } unsigned ARMFastISel::getLibcallReg(const Twine &Name) { + // Manually compute the global's type to avoid building it when unnecessary. + Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0); + EVT LCREVT = TLI.getValueType(GVTy); + if (!LCREVT.isSimple()) return 0; + GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, GlobalValue::ExternalLinkage, 0, Name); - EVT LCREVT = TLI.getValueType(GV->getType()); - if (!LCREVT.isSimple()) return 0; + assert(GV->getType() == GVTy && "We miscomputed the type for the global!"); return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); } @@ -2403,15 +2463,22 @@ bool ARMFastISel::SelectCall(const Instruction *I, MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); + unsigned char OpFlags = 0; + + // Add MO_PLT for global address or external symbol in the PIC relocation + // model. + if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_) + OpFlags = ARMII::MO_PLT; + // ARM calls don't take a predicate, but tBL / tBLX do. if(isThumb2) AddDefaultPred(MIB); if (UseReg) MIB.addReg(CalleeReg); else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); + MIB.addGlobalAddress(GV, 0, OpFlags); else - MIB.addExternalSymbol(IntrMemName, 0); + MIB.addExternalSymbol(IntrMemName, OpFlags); // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) @@ -2602,47 +2669,136 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) return 0; + if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1) + return 0; - unsigned Opc; - bool isBoolZext = false; - const TargetRegisterClass *RC; - switch (SrcVT.SimpleTy) { - default: return 0; - case MVT::i16: - if (!Subtarget->hasV6Ops()) return 0; - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; - if (isZExt) - Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH; - else - Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; - break; - case MVT::i8: - if (!Subtarget->hasV6Ops()) return 0; - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; - if (isZExt) - Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB; - else - Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; - break; - case MVT::i1: - if (isZExt) { - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; - Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; - isBoolZext = true; - break; + // Table of which combinations can be emitted as a single instruction, + // and which will require two. + static const uint8_t isSingleInstrTbl[3][2][2][2] = { + // ARM Thumb + // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops + // ext: s z s z s z s z + /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } }, + /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }, + /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } } + }; + + // Target registers for: + // - For ARM can never be PC. + // - For 16-bit Thumb are restricted to lower 8 registers. + // - For 32-bit Thumb are restricted to non-SP and non-PC. + static const TargetRegisterClass *RCTbl[2][2] = { + // Instructions: Two Single + /* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass }, + /* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass } + }; + + // Table governing the instruction(s) to be emitted. + static const struct InstructionTable { + uint32_t Opc : 16; + uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0. + uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi. + uint32_t Imm : 8; // All instructions have either a shift or a mask. + } IT[2][2][3][2] = { + { // Two instructions (first is left shift, second is in this table). + { // ARM Opc S Shift Imm + /* 1 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 31 }, + /* 1 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 31 } }, + /* 8 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 24 }, + /* 8 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 24 } }, + /* 16 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 16 }, + /* 16 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 16 } } + }, + { // Thumb Opc S Shift Imm + /* 1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 31 }, + /* 1 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 31 } }, + /* 8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 24 }, + /* 8 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 24 } }, + /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 16 }, + /* 16 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 16 } } + } + }, + { // Single instruction. + { // ARM Opc S Shift Imm + /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 }, + /* 1 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 1 } }, + /* 8 bit sext */ { { ARM::SXTB , 0, ARM_AM::no_shift, 0 }, + /* 8 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 255 } }, + /* 16 bit sext */ { { ARM::SXTH , 0, ARM_AM::no_shift, 0 }, + /* 16 bit zext */ { ARM::UXTH , 0, ARM_AM::no_shift, 0 } } + }, + { // Thumb Opc S Shift Imm + /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 }, + /* 1 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 1 } }, + /* 8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift, 0 }, + /* 8 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } }, + /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift, 0 }, + /* 16 bit zext */ { ARM::t2UXTH , 0, ARM_AM::no_shift, 0 } } + } } - return 0; + }; + + unsigned SrcBits = SrcVT.getSizeInBits(); + unsigned DestBits = DestVT.getSizeInBits(); + (void) DestBits; + assert((SrcBits < DestBits) && "can only extend to larger types"); + assert((DestBits == 32 || DestBits == 16 || DestBits == 8) && + "other sizes unimplemented"); + assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) && + "other sizes unimplemented"); + + bool hasV6Ops = Subtarget->hasV6Ops(); + unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2} + assert((Bitness < 3) && "sanity-check table bounds"); + + bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt]; + const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr]; + const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt]; + unsigned Opc = ITP->Opc; + assert(ARM::KILL != Opc && "Invalid table entry"); + unsigned hasS = ITP->hasS; + ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift; + assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) && + "only MOVsi has shift operand addressing mode"); + unsigned Imm = ITP->Imm; + + // 16-bit Thumb instructions always set CPSR (unless they're in an IT block). + bool setsCPSR = &ARM::tGPRRegClass == RC; + unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi; + unsigned ResultReg; + // MOVsi encodes shift and immediate in shift operand addressing mode. + // The following condition has the same value when emitting two + // instruction sequences: both are shifts. + bool ImmIsSO = (Shift != ARM_AM::no_shift); + + // Either one or two instructions are emitted. + // They're always of the form: + // dst = in OP imm + // CPSR is set only by 16-bit Thumb instructions. + // Predicate, if any, is AL. + // S bit, if available, is always 0. + // When two are emitted the first's result will feed as the second's input, + // that value is then dead. + unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2; + for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) { + ResultReg = createResultReg(RC); + bool isLsl = (0 == Instr) && !isSingleInstr; + unsigned Opcode = isLsl ? LSLOpc : Opc; + ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift; + unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm; + bool isKill = 1 == Instr; + MachineInstrBuilder MIB = BuildMI( + *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg); + if (setsCPSR) + MIB.addReg(ARM::CPSR, RegState::Define); + SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR); + AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc)); + if (hasS) + AddDefaultCC(MIB); + // Second instruction consumes the first's result. + SrcReg = ResultReg; } - unsigned ResultReg = createResultReg(RC); - MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) - .addReg(SrcReg); - if (isBoolZext) - MIB.addImm(1); - else - MIB.addImm(0); - AddOptionalDefs(MIB); return ResultReg; } @@ -2707,7 +2863,7 @@ bool ARMFastISel::SelectShift(const Instruction *I, if (Reg2 == 0) return false; } - unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass); if(ResultReg == 0) return false; MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -2797,6 +2953,25 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return false; } +namespace { +// This table describes sign- and zero-extend instructions which can be +// folded into a preceding load. All of these extends have an immediate +// (sometimes a mask and sometimes a shift) that's applied after +// extension. +const struct FoldableLoadExtendsStruct { + uint16_t Opc[2]; // ARM, Thumb. + uint8_t ExpectedImm; + uint8_t isZExt : 1; + uint8_t ExpectedVT : 7; +} FoldableLoadExtends[] = { + { { ARM::SXTH, ARM::t2SXTH }, 0, 0, MVT::i16 }, + { { ARM::UXTH, ARM::t2UXTH }, 0, 1, MVT::i16 }, + { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8 }, + { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 }, + { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 } +}; +} + /// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if @@ -2812,26 +2987,23 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, // ldrb r1, [r0] ldrb r1, [r0] // uxtb r2, r1 => // mov r3, r2 mov r3, r1 - bool isZExt = true; - switch(MI->getOpcode()) { - default: return false; - case ARM::SXTH: - case ARM::t2SXTH: - isZExt = false; - case ARM::UXTH: - case ARM::t2UXTH: - if (VT != MVT::i16) - return false; - break; - case ARM::SXTB: - case ARM::t2SXTB: - isZExt = false; - case ARM::UXTB: - case ARM::t2UXTB: - if (VT != MVT::i8) - return false; - break; + if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm()) + return false; + const uint64_t Imm = MI->getOperand(2).getImm(); + + bool Found = false; + bool isZExt; + for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends); + i != e; ++i) { + if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() && + (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm && + MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) { + Found = true; + isZExt = FoldableLoadExtends[i].isZExt; + } } + if (!Found) return false; + // See if we can handle this address. Address Addr; if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; @@ -2854,12 +3026,14 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT)); // Load value. if (isThumb2) { + DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRpci), DestReg1) .addConstantPoolIndex(Idx)); Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs; } else { // The extra immediate is for addrmode2. + DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg1) .addConstantPoolIndex(Idx).addImm(0)); @@ -2873,6 +3047,9 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, } unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT)); + DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0); + DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1); + GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg2) .addReg(DestReg1) @@ -2938,12 +3115,10 @@ bool ARMFastISel::FastLowerArguments() { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); + const TargetRegisterClass *RC = &ARM::rGPRRegClass; Idx = 0; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++Idx) { - if (I->use_empty()) - continue; unsigned SrcReg = GPRArgRegs[Idx]; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. @@ -2961,13 +3136,23 @@ bool ARMFastISel::FastLowerArguments() { namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { - // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); - // Darwin and thumb1 only for now. const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); - if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only()) + // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl. + bool UseFastISel = false; + UseFastISel |= Subtarget->isTargetIOS() && !Subtarget->isThumb1Only(); + UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb(); + UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb(); + + if (UseFastISel) { + // iOS always has a FP for backtracking, force other targets + // to keep their FP when doing FastISel. The emitted code is + // currently superior, and in cases like test-suite's lencod + // FastISel isn't quite correct when FP is eliminated. + TM.Options.NoFramePointerElim = true; return new ARMFastISel(funcInfo, libInfo); + } return 0; } } diff --git a/contrib/llvm/lib/Target/ARM/ARMFeatures.h b/contrib/llvm/lib/Target/ARM/ARMFeatures.h new file mode 100644 index 000000000000..dafc4b3a82bd --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMFeatures.h @@ -0,0 +1,93 @@ +//===-- ARMFeatures.h - Checks for ARM instruction features ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the code shared between ARM CodeGen and ARM MC +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_ARM_FEATURES_H +#define TARGET_ARM_FEATURES_H + +#include "ARM.h" + +namespace llvm { + +template<typename InstrType> // could be MachineInstr or MCInst +inline bool isV8EligibleForIT(InstrType *Instr, int BLXOperandIndex = 0) { + switch (Instr->getOpcode()) { + default: + return false; + case ARM::tADC: + case ARM::tADDi3: + case ARM::tADDi8: + case ARM::tADDrSPi: + case ARM::tADDrr: + case ARM::tAND: + case ARM::tASRri: + case ARM::tASRrr: + case ARM::tBIC: + case ARM::tCMNz: + case ARM::tCMPi8: + case ARM::tCMPr: + case ARM::tEOR: + case ARM::tLDRBi: + case ARM::tLDRBr: + case ARM::tLDRHi: + case ARM::tLDRHr: + case ARM::tLDRSB: + case ARM::tLDRSH: + case ARM::tLDRi: + case ARM::tLDRr: + case ARM::tLDRspi: + case ARM::tLSLri: + case ARM::tLSLrr: + case ARM::tLSRri: + case ARM::tLSRrr: + case ARM::tMOVi8: + case ARM::tMUL: + case ARM::tMVN: + case ARM::tORR: + case ARM::tROR: + case ARM::tRSB: + case ARM::tSBC: + case ARM::tSTRBi: + case ARM::tSTRBr: + case ARM::tSTRHi: + case ARM::tSTRHr: + case ARM::tSTRi: + case ARM::tSTRr: + case ARM::tSTRspi: + case ARM::tSUBi3: + case ARM::tSUBi8: + case ARM::tSUBrr: + case ARM::tTST: + return true; +// there are some "conditionally deprecated" opcodes + case ARM::tADDspr: + return Instr->getOperand(2).getReg() != ARM::PC; + // ADD PC, SP and BLX PC were always unpredictable, + // now on top of it they're deprecated + case ARM::tADDrSP: + case ARM::tBX: + return Instr->getOperand(0).getReg() != ARM::PC; + case ARM::tBLXr: + return Instr->getOperand(BLXOperandIndex).getReg() != ARM::PC; + case ARM::tADDhirr: + return Instr->getOperand(0).getReg() != ARM::PC && + Instr->getOperand(2).getReg() != ARM::PC; + case ARM::tCMPhir: + case ARM::tMOVr: + return Instr->getOperand(0).getReg() != ARM::PC && + Instr->getOperand(1).getReg() != ARM::PC; + } +} + +} + +#endif diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 483802b130b5..d32bdbc58939 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -82,22 +82,11 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); } -static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { - for (unsigned i = 0; CSRegs[i]; ++i) - if (Reg == CSRegs[i]) - return true; - return false; -} - static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, const uint16_t *CSRegs) { // Integer spill area is handled with "pop". - if (MI->getOpcode() == ARM::LDMIA_RET || - MI->getOpcode() == ARM::t2LDMIA_RET || - MI->getOpcode() == ARM::LDMIA_UPD || - MI->getOpcode() == ARM::t2LDMIA_UPD || - MI->getOpcode() == ARM::VLDMDIA_UPD) { + if (isPopOpcode(MI->getOpcode())) { // The first two operands are predicates. The last two are // imp-def and imp-use of SP. Check everything in between. for (int i = 5, e = MI->getNumOperands(); i != e; ++i) @@ -115,20 +104,31 @@ static bool isCSRestore(MachineInstr *MI, return false; } -static void -emitSPUpdate(bool isARM, - MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { +static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + const ARMBaseInstrInfo &TII, unsigned DestReg, + unsigned SrcReg, int NumBytes, + unsigned MIFlags = MachineInstr::NoFlags, + ARMCC::CondCodes Pred = ARMCC::AL, + unsigned PredReg = 0) { if (isARM) - emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, + emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, Pred, PredReg, TII, MIFlags); else - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, + emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, Pred, PredReg, TII, MIFlags); } +static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + const ARMBaseInstrInfo &TII, int NumBytes, + unsigned MIFlags = MachineInstr::NoFlags, + ARMCC::CondCodes Pred = ARMCC::AL, + unsigned PredReg = 0) { + emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes, + MIFlags, Pred, PredReg); +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -141,7 +141,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -174,6 +175,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: @@ -181,73 +186,61 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; - AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; break; case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: + case ARM::R12: if (Reg == FramePtr) FramePtrSpillFI = FI; - if (STI.isTargetIOS()) { - AFI->addGPRCalleeSavedArea2Frame(FI); + if (STI.isTargetIOS()) GPRCS2Size += 4; - } else { - AFI->addGPRCalleeSavedArea1Frame(FI); + else GPRCS1Size += 4; - } break; default: // This is a DPR. Exclude the aligned DPRCS2 spills. if (Reg == ARM::D8) D8SpillFI = FI; - if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) { - AFI->addDPRCalleeSavedAreaFrame(FI); + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) DPRCSSize += 8; - } } } // Move past area 1. - if (GPRCS1Size > 0) MBBI++; - - // Set FP to point to the stack slot that contains the previous FP. - // For iOS, FP is R7, which has now been stored in spill area 1. - // Otherwise, if this is not iOS, all the callee-saved registers go - // into spill area 1, including the FP in R11. In either case, it is - // now safe to emit this assignment. - bool HasFP = hasFP(MF); - if (HasFP) { - unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0) - .setMIFlag(MachineInstr::FrameSetup); - AddDefaultCC(AddDefaultPred(MIB)); - } - - // Move past area 2. - if (GPRCS2Size > 0) MBBI++; + MachineBasicBlock::iterator LastPush = MBB.end(), FramePtrPush; + if (GPRCS1Size > 0) + FramePtrPush = LastPush = MBBI++; // Determine starting offsets of spill areas. + bool HasFP = hasFP(MF); unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - if (HasFP) + int FramePtrOffsetInPush = 0; + if (HasFP) { + FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size; AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + } AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); + // Move past area 2. + if (GPRCS2Size > 0) { + LastPush = MBBI++; + } + // Move past area 3. if (DPRCSSize > 0) { - MBBI++; + LastPush = MBBI++; // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) - MBBI++; + LastPush = MBBI++; } // Move past the aligned DPRCS2 area. @@ -263,8 +256,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (NumBytes) { // Adjust SP after all the callee-save spills. - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, - MachineInstr::FrameSetup); + if (tryFoldSPUpdateIntoPushPop(MF, LastPush, NumBytes)) { + if (LastPush == FramePtrPush) + FramePtrOffsetInPush += NumBytes; + } else + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, + MachineInstr::FrameSetup); + if (HasFP && isARM) // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 // Note it's not safe to do this in Thumb2 mode because it would have @@ -277,6 +275,18 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } + // Set FP to point to the stack slot that contains the previous FP. + // For iOS, FP is R7, which has now been stored in spill area 1. + // Otherwise, if this is not iOS, all the callee-saved registers go + // into spill area 1, including the FP in R11. In either case, it + // is in area one and the adjustment needs to take place just after + // that push. + if (HasFP) + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, ++FramePtrPush, dl, TII, + FramePtr, ARM::SP, FramePtrOffsetInPush, + MachineInstr::FrameSetup); + + if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); @@ -357,7 +367,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -371,11 +382,11 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (MBBI != MBB.begin()) { - do + do { --MBBI; - while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); + } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); if (!isCSRestore(MBBI, TII, CSRegs)) ++MBBI; } @@ -419,8 +430,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, ARM::SP) .addReg(FramePtr)); } - } else if (NumBytes) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); + } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes)) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. if (AFI->getDPRCalleeSavedAreaSize()) { @@ -499,12 +510,6 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, FrameReg = ARM::SP; Offset += SPAdj; - if (AFI->isGPRCalleeSavedArea1Frame(FI)) - return Offset - AFI->getGPRCalleeSavedArea1Offset(); - else if (AFI->isGPRCalleeSavedArea2Frame(FI)) - return Offset - AFI->getGPRCalleeSavedArea2Offset(); - else if (AFI->isDPRCalleeSavedAreaFrame(FI)) - return Offset - AFI->getDPRCalleeSavedAreaOffset(); // SP can move around if there are allocas. We may also lose track of SP // when emergency spilling inside a non-reserved call frame setup. @@ -656,6 +661,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, unsigned RetOpcode = MI->getOpcode(); bool isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri); + bool isInterrupt = + RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; SmallVector<unsigned, 4> Regs; unsigned i = CSI.size(); @@ -670,7 +677,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) continue; - if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) { + if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && + STI.hasV5TOps()) { Reg = ARM::PC; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; // Fold the return instruction into the LDM. @@ -1197,7 +1205,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; @@ -1224,6 +1232,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, case ARM::LR: LRSpilled = true; // Fallthrough + case ARM::R0: case ARM::R1: + case ARM::R2: case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: CS1Spilled = true; @@ -1238,6 +1248,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } switch (Reg) { + case ARM::R0: case ARM::R1: + case ARM::R2: case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: @@ -1293,8 +1305,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (!LRSpilled && CS1Spilled) { MRI.setPhysRegUsed(ARM::LR); NumGPRSpills++; - UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), - UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); + SmallVectorImpl<unsigned>::iterator LRPos; + LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), + (unsigned)ARM::LR); + if (LRPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(LRPos); + ForceLRSpill = false; ExtraCSSpill = true; } diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index 1240169e84ed..c69d313fd9ce 100644 --- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -44,10 +44,16 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) { MachineInstr *DefMI = LastMI; const MCInstrDesc &LastMCID = LastMI->getDesc(); + const TargetMachine &TM = + MI->getParent()->getParent()->getTarget(); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + // Skip over one non-VFP / NEON instruction. if (!LastMI->isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isLikeA9() && (LastMI->mayLoad() || LastMI->mayStore())) && + !(TII.getSubtarget().isLikeA9() && + (LastMI->mayLoad() || LastMI->mayStore())) && (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { @@ -58,7 +64,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (TII.isFpMLxInstruction(DefMI->getOpcode()) && (TII.canCauseFpMLxStall(MI->getOpcode()) || - hasRAWHazard(DefMI, MI, TRI))) { + hasRAWHazard(DefMI, MI, TII.getRegisterInfo()))) { // Try to schedule another instruction for the next 4 cycles. if (FpMLxStalls == 0) FpMLxStalls = 4; diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h index 98bfc4cf0cc5..e1dcec3d1cc8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h +++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h @@ -28,21 +28,14 @@ class MachineInstr; /// ARM preRA scheduler uses an unspecialized instance of the /// ScoreboardHazardRecognizer. class ARMHazardRecognizer : public ScoreboardHazardRecognizer { - const ARMBaseInstrInfo &TII; - const ARMBaseRegisterInfo &TRI; - const ARMSubtarget &STI; - MachineInstr *LastMI; unsigned FpMLxStalls; public: ARMHazardRecognizer(const InstrItineraryData *ItinData, - const ARMBaseInstrInfo &tii, - const ARMBaseRegisterInfo &tri, - const ARMSubtarget &sti, - const ScheduleDAG *DAG) : - ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii), - TRI(tri), STI(sti), LastMI(0) {} + const ScheduleDAG *DAG) + : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), + LastMI(0) {} virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void Reset(); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 5e88e95d6162..87d15226947a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -61,7 +61,6 @@ enum AddrMode2Type { class ARMDAGToDAGISel : public SelectionDAGISel { ARMBaseTargetMachine &TM; - const ARMBaseInstrInfo *TII; /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. @@ -71,7 +70,6 @@ public: explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), TM(tm), - TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())), Subtarget(&TM.getSubtarget<ARMSubtarget>()) { } @@ -132,6 +130,13 @@ public: return true; } + bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { + const ConstantSDNode *CN = cast<ConstantSDNode>(N); + Pred = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); + return true; + } + bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, @@ -177,6 +182,7 @@ public: SDValue &OffImm); bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); + bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); inline bool is_so_imm(unsigned Imm) const { return ARM_AM::getSOImmVal(Imm) != -1; @@ -240,21 +246,6 @@ private: /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); - /// SelectCMOVOp - Select CMOV instructions for ARM. - SDNode *SelectCMOVOp(SDNode *N); - SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - // Select special operations if node forms integer ABS pattern SDNode *SelectABSOp(SDNode *N); @@ -262,7 +253,7 @@ private: SDNode *SelectConcatVector(SDNode *N); - SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); + SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. @@ -364,7 +355,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() { continue; // Check if the AND mask is an immediate of the form: 000.....1111111100 - unsigned TZ = CountTrailingZeros_32(And_imm); + unsigned TZ = countTrailingZeros(And_imm); if (TZ != 1 && TZ != 2) // Be conservative here. Shifter operands aren't always free. e.g. On // Swift, left shifter operand of 1 / 2 for free but others are not. @@ -402,12 +393,12 @@ void ARMDAGToDAGISel::PreprocessISelDAG() { } // Now make the transformation. - Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32, + Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, Srl.getOperand(0), CurDAG->getConstant(Srl_imm+TZ, MVT::i32)); - N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32, + N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, Srl, CurDAG->getConstant(And_imm, MVT::i32)); - N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32, + N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, N1, CurDAG->getConstant(TZ, MVT::i32)); CurDAG->UpdateNodeOperands(N, N0, N1); } @@ -423,7 +414,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (!CheckVMLxHazard) return true; - if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() && + if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9() && !Subtarget->isSwift()) return true; @@ -434,6 +425,9 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (Use->getOpcode() == ISD::CopyToReg) return true; if (Use->isMachineOpcode()) { + const ARMBaseInstrInfo *TII = + static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); if (MCID.mayStore()) return true; @@ -533,7 +527,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -557,7 +552,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -703,7 +699,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -724,7 +721,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -901,7 +899,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); @@ -915,7 +914,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -960,7 +960,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -978,7 +979,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } ARM_AM::AddrOpc AddSub = ARM_AM::add; @@ -1202,7 +1204,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1219,7 +1222,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1267,7 +1271,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1297,7 +1302,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1326,7 +1332,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1403,6 +1410,34 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, return true; } +bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, + SDValue &OffImm) { + // This *must* succeed since it's used for the irreplacable ldrex and strex + // instructions. + Base = N; + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + + if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) + return true; + + ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!RHS) + return true; + + uint32_t RHSC = (int)RHS->getZExtValue(); + if (RHSC > 1020 || RHSC % 4 != 0) + return true; + + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy()); + } + + OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32); + return true; +} + //===--------------------------------------------------------------------===// /// getAL - Returns a ARMCC::AL immediate node. @@ -1468,14 +1503,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, + return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, MVT::Other, Ops); } else { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, + return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, MVT::Other, Ops); } } @@ -1524,7 +1559,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, + return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, MVT::Other, Ops); } @@ -1533,7 +1568,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { /// \brief Form a GPRPair pseudo register from a pair of GPR regs. SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); @@ -1544,7 +1579,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form a D register from a pair of S registers. SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); @@ -1555,7 +1590,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form a quad register from a pair of D registers. SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); @@ -1565,7 +1600,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form 4 consecutive D registers from a pair of Q registers. SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); @@ -1576,7 +1611,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form 4 consecutive S registers. SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); @@ -1591,7 +1626,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, /// \brief Form 4 consecutive D registers. SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); @@ -1605,7 +1640,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, /// \brief Form 4 consecutive Q registers. SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); @@ -1689,7 +1724,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *QOpcodes0, const uint16_t *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; @@ -1821,7 +1856,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *QOpcodes0, const uint16_t *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; @@ -1966,7 +2001,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, const uint16_t *DOpcodes, const uint16_t *QOpcodes) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; @@ -2084,7 +2119,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *Opcodes) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) @@ -2166,7 +2201,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc) { assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); unsigned FirstTblReg = IsExt ? 2 : 1; @@ -2278,204 +2313,6 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, return NULL; } -SDNode *ARMDAGToDAGISel:: -SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - SDValue CPTmp0; - SDValue CPTmp1; - if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { - unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue(); - unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); - unsigned Opc = 0; - switch (SOShOp) { - case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break; - case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break; - case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break; - case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; - default: - llvm_unreachable("Unknown so_reg opcode!"); - } - SDValue SOShImm = - CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6); - } - return 0; -} - -SDNode *ARMDAGToDAGISel:: -SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; - if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6); - } - - if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7); - } - return 0; -} - -SDNode *ARMDAGToDAGISel:: -SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); - if (!T) - return 0; - - unsigned Opc = 0; - unsigned TrueImm = T->getZExtValue(); - if (is_t2_so_imm(TrueImm)) { - Opc = ARM::t2MOVCCi; - } else if (TrueImm <= 0xffff) { - Opc = ARM::t2MOVCCi16; - } else if (is_t2_so_imm_not(TrueImm)) { - TrueImm = ~TrueImm; - Opc = ARM::t2MVNCCi; - } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) { - // Large immediate. - Opc = ARM::t2MOVCCi32imm; - } - - if (Opc) { - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); - } - - return 0; -} - -SDNode *ARMDAGToDAGISel:: -SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); - if (!T) - return 0; - - unsigned Opc = 0; - unsigned TrueImm = T->getZExtValue(); - bool isSoImm = is_so_imm(TrueImm); - if (isSoImm) { - Opc = ARM::MOVCCi; - } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) { - Opc = ARM::MOVCCi16; - } else if (is_so_imm_not(TrueImm)) { - TrueImm = ~TrueImm; - Opc = ARM::MVNCCi; - } else if (TrueVal.getNode()->hasOneUse() && - (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) { - // Large immediate. - Opc = ARM::MOVCCi32imm; - } - - if (Opc) { - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); - } - - return 0; -} - -SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { - EVT VT = N->getValueType(0); - SDValue FalseVal = N->getOperand(0); - SDValue TrueVal = N->getOperand(1); - SDValue CC = N->getOperand(2); - SDValue CCR = N->getOperand(3); - SDValue InFlag = N->getOperand(4); - assert(CC.getOpcode() == ISD::Constant); - assert(CCR.getOpcode() == ISD::Register); - ARMCC::CondCodes CCVal = - (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue(); - - if (!Subtarget->isThumb1Only() && VT == MVT::i32) { - // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Pattern complexity = 18 cost = 1 size = 0 - if (Subtarget->isThumb()) { - SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } else { - SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } - - // Pattern: (ARMcmov:i32 GPR:i32:$false, - // (imm:i32)<<P:Pred_so_imm>>:$true, - // (imm:i32):$cc) - // Emits: (MOVCCi:i32 GPR:i32:$false, - // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) - // Pattern complexity = 10 cost = 1 size = 0 - if (Subtarget->isThumb()) { - SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } else { - SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } - } - - // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Pattern complexity = 6 cost = 1 size = 0 - // - // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Pattern complexity = 6 cost = 11 size = 0 - // - // Also VMOVScc and VMOVDcc. - SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; - unsigned Opc = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Illegal conditional move type!"); - case MVT::i32: - Opc = Subtarget->isThumb() - ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) - : ARM::MOVCCr; - break; - case MVT::f32: - Opc = ARM::VMOVScc; - break; - case MVT::f64: - Opc = ARM::VMOVDcc; - break; - } - return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); -} - /// Target-specific DAG combining for ISD::XOR. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X @@ -2524,27 +2361,40 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); } -SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { +SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, + unsigned Op16,unsigned Op32, + unsigned Op64) { + // Mostly direct translation to the given operations, except that we preserve + // the AtomicOrdering for use later on. + AtomicSDNode *AN = cast<AtomicSDNode>(Node); + EVT VT = AN->getMemoryVT(); + + unsigned Op; + SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other); + if (VT == MVT::i8) + Op = Op8; + else if (VT == MVT::i16) + Op = Op16; + else if (VT == MVT::i32) + Op = Op32; + else if (VT == MVT::i64) { + Op = Op64; + VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other); + } else + llvm_unreachable("Unexpected atomic operation"); + SmallVector<SDValue, 6> Ops; - Ops.push_back(Node->getOperand(1)); // Ptr - Ops.push_back(Node->getOperand(2)); // Low part of Val1 - Ops.push_back(Node->getOperand(3)); // High part of Val1 - if (Opc == ARM::ATOMCMPXCHG6432) { - Ops.push_back(Node->getOperand(4)); // Low part of Val2 - Ops.push_back(Node->getOperand(5)); // High part of Val2 - } - Ops.push_back(Node->getOperand(0)); // Chain - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - MVT::i32, MVT::i32, MVT::Other, - Ops); - cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); - return ResNode; + for (unsigned i = 1; i < AN->getNumOperands(); ++i) + Ops.push_back(AN->getOperand(i)); + + Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); + Ops.push_back(AN->getOperand(0)); // Chain moves to the end + + return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size()); } SDNode *ARMDAGToDAGISel::Select(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->isMachineOpcode()) { N->setNodeId(-1); @@ -2589,7 +2439,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue CPIdx = CurDAG->getTargetConstantPool(ConstantInt::get( Type::getInt32Ty(*CurDAG->getContext()), Val), - TLI.getPointerTy()); + getTargetLowering()->getPointerTy()); SDNode *ResNode; if (Subtarget->isThumb1Only()) { @@ -2619,7 +2469,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::FrameIndex: { // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); if (Subtarget->isThumb1Only()) { SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; @@ -2840,8 +2691,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue(Chain.getNode(), Chain.getResNo())); return NULL; } - case ARMISD::CMOV: - return SelectCMOVOp(N); case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); @@ -3123,7 +2972,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_ldrexd: { SDValue MemAddr = N->getOperand(2); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Chain = N->getOperand(0); bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); @@ -3181,7 +3030,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case Intrinsic::arm_strexd: { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Chain = N->getOperand(0); SDValue Val0 = N->getOperand(2); SDValue Val1 = N->getOperand(3); @@ -3385,7 +3234,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VTBL1: { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SmallVector<SDValue, 6> Ops; @@ -3396,7 +3245,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops); } case ARMISD::VTBL2: { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); // Form a REG_SEQUENCE to force register allocation. @@ -3415,31 +3264,90 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::CONCAT_VECTORS: return SelectConcatVector(N); - case ARMISD::ATOMOR64_DAG: - return SelectAtomic64(N, ARM::ATOMOR6432); - case ARMISD::ATOMXOR64_DAG: - return SelectAtomic64(N, ARM::ATOMXOR6432); - case ARMISD::ATOMADD64_DAG: - return SelectAtomic64(N, ARM::ATOMADD6432); - case ARMISD::ATOMSUB64_DAG: - return SelectAtomic64(N, ARM::ATOMSUB6432); - case ARMISD::ATOMNAND64_DAG: - return SelectAtomic64(N, ARM::ATOMNAND6432); - case ARMISD::ATOMAND64_DAG: - return SelectAtomic64(N, ARM::ATOMAND6432); - case ARMISD::ATOMSWAP64_DAG: - return SelectAtomic64(N, ARM::ATOMSWAP6432); - case ARMISD::ATOMCMPXCHG64_DAG: - return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); - - case ARMISD::ATOMMIN64_DAG: - return SelectAtomic64(N, ARM::ATOMMIN6432); - case ARMISD::ATOMUMIN64_DAG: - return SelectAtomic64(N, ARM::ATOMUMIN6432); - case ARMISD::ATOMMAX64_DAG: - return SelectAtomic64(N, ARM::ATOMMAX6432); - case ARMISD::ATOMUMAX64_DAG: - return SelectAtomic64(N, ARM::ATOMUMAX6432); + case ISD::ATOMIC_LOAD: + if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64) + return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64); + else + break; + + case ISD::ATOMIC_STORE: + if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64) + return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_STORE_I64); + else + break; + + case ISD::ATOMIC_LOAD_ADD: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_ADD_I8, + ARM::ATOMIC_LOAD_ADD_I16, + ARM::ATOMIC_LOAD_ADD_I32, + ARM::ATOMIC_LOAD_ADD_I64); + case ISD::ATOMIC_LOAD_SUB: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_SUB_I8, + ARM::ATOMIC_LOAD_SUB_I16, + ARM::ATOMIC_LOAD_SUB_I32, + ARM::ATOMIC_LOAD_SUB_I64); + case ISD::ATOMIC_LOAD_AND: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_AND_I8, + ARM::ATOMIC_LOAD_AND_I16, + ARM::ATOMIC_LOAD_AND_I32, + ARM::ATOMIC_LOAD_AND_I64); + case ISD::ATOMIC_LOAD_OR: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_OR_I8, + ARM::ATOMIC_LOAD_OR_I16, + ARM::ATOMIC_LOAD_OR_I32, + ARM::ATOMIC_LOAD_OR_I64); + case ISD::ATOMIC_LOAD_XOR: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_XOR_I8, + ARM::ATOMIC_LOAD_XOR_I16, + ARM::ATOMIC_LOAD_XOR_I32, + ARM::ATOMIC_LOAD_XOR_I64); + case ISD::ATOMIC_LOAD_NAND: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_NAND_I8, + ARM::ATOMIC_LOAD_NAND_I16, + ARM::ATOMIC_LOAD_NAND_I32, + ARM::ATOMIC_LOAD_NAND_I64); + case ISD::ATOMIC_LOAD_MIN: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_MIN_I8, + ARM::ATOMIC_LOAD_MIN_I16, + ARM::ATOMIC_LOAD_MIN_I32, + ARM::ATOMIC_LOAD_MIN_I64); + case ISD::ATOMIC_LOAD_MAX: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_MAX_I8, + ARM::ATOMIC_LOAD_MAX_I16, + ARM::ATOMIC_LOAD_MAX_I32, + ARM::ATOMIC_LOAD_MAX_I64); + case ISD::ATOMIC_LOAD_UMIN: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_UMIN_I8, + ARM::ATOMIC_LOAD_UMIN_I16, + ARM::ATOMIC_LOAD_UMIN_I32, + ARM::ATOMIC_LOAD_UMIN_I64); + case ISD::ATOMIC_LOAD_UMAX: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_UMAX_I8, + ARM::ATOMIC_LOAD_UMAX_I16, + ARM::ATOMIC_LOAD_UMAX_I32, + ARM::ATOMIC_LOAD_UMAX_I64); + case ISD::ATOMIC_SWAP: + return SelectAtomic(N, + ARM::ATOMIC_SWAP_I8, + ARM::ATOMIC_SWAP_I16, + ARM::ATOMIC_SWAP_I32, + ARM::ATOMIC_SWAP_I64); + case ISD::ATOMIC_CMP_SWAP: + return SelectAtomic(N, + ARM::ATOMIC_CMP_SWAP_I8, + ARM::ATOMIC_CMP_SWAP_I16, + ARM::ATOMIC_CMP_SWAP_I32, + ARM::ATOMIC_CMP_SWAP_I64); } return SelectCode(N); @@ -3451,24 +3359,20 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ bool Changed = false; unsigned NumOps = N->getNumOperands(); - ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>( - N->getOperand(InlineAsm::Op_AsmString)); - StringRef AsmString = StringRef(S->getSymbol()); - // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs // respectively. Since there is no constraint to explicitly specify a - // reg pair, we search %H operand inside the asm string. If it is found, the - // transformation below enforces a GPRPair reg class for "%r" for 64-bit data. - if (AsmString.find(":H}") == StringRef::npos) - return NULL; + // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, + // the 64-bit data may be referred by H, Q, R modifiers, so we still pack + // them into a GPRPair. - DebugLoc dl = N->getDebugLoc(); - SDValue Glue = N->getOperand(NumOps-1); + SDLoc dl(N); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0); + SmallVector<bool, 8> OpChanged; // Glue node will be appended late. - for(unsigned i = 0; i < NumOps -1; ++i) { + for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { SDValue op = N->getOperand(i); AsmNodeOperands.push_back(op); @@ -3482,17 +3386,38 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ else continue; + // Immediate operands to inline asm in the SelectionDAG are modeled with + // two operands. The first is a constant of value InlineAsm::Kind_Imm, and + // the second is a constant with the value of the immediate. If we get here + // and we have a Kind_Imm, skip the next operand, and continue. + if (Kind == InlineAsm::Kind_Imm) { + SDValue op = N->getOperand(++i); + AsmNodeOperands.push_back(op); + continue; + } + + unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); + if (NumRegs) + OpChanged.push_back(false); + + unsigned DefIdx = 0; + bool IsTiedToChangedOp = false; + // If it's a use that is tied with a previous def, it has no + // reg class constraint. + if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) + IsTiedToChangedOp = OpChanged[DefIdx]; + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef && Kind != InlineAsm::Kind_RegDefEarlyClobber) continue; - unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag); unsigned RC; bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); - if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2) + if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) + || NumRegs != 2) continue; - assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm"); + assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); SDValue V0 = N->getOperand(i+1); SDValue V1 = N->getOperand(i+2); unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); @@ -3553,8 +3478,12 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ Changed = true; if(PairedReg.getNode()) { + OpChanged[OpChanged.size() -1 ] = true; Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); - Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); + if (IsTiedToChangedOp) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); + else + Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); // Replace the current flag. AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( Flag, MVT::i32); @@ -3565,11 +3494,12 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ } } - AsmNodeOperands.push_back(Glue); + if (Glue.getNode()) + AsmNodeOperands.push_back(Glue); if (!Changed) return NULL; - SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], AsmNodeOperands.size()); New->setNodeId(-1); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index e49cfc49854a..76a0a831f695 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -48,6 +48,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" +#include <utility> using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); @@ -74,7 +75,7 @@ namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, - const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs, + const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, ParmContext PC) : CCState(CC, isVarArg, MF, TM, locs, C) { assert(((PC == Call) || (PC == Prologue)) && @@ -174,9 +175,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - if (Subtarget->isTargetDarwin()) { + if (Subtarget->isTargetIOS()) { // Uses VFP for Thumb libfuncs if available. - if (Subtarget->isThumb() && Subtarget->hasVFP2()) { + if (Subtarget->isThumb() && Subtarget->hasVFP2() && + Subtarget->hasARMOps()) { // Single-precision floating-point arithmetic. setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); @@ -421,7 +423,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // Use divmod compiler-rt calls for iOS 5.0 and later. - if (Subtarget->getTargetTriple().getOS() == Triple::IOS && + if (Subtarget->getTargetTriple().isiOS() && !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); @@ -452,6 +454,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f64, Custom); if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); @@ -564,16 +567,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); - // Custom expand long extensions to vectors. - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); - // NEON does not have single instruction CTPOP for vectors with element // types wider than 8-bits. However, custom lowering can leverage the // v8i8/v16i8 vcnt instruction. @@ -681,6 +674,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); + // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); @@ -691,10 +686,36 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); } + + // FIXME: Also set divmod for SREM on EABI setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + // Register based DivRem for AEABI (RTABI 4.2) + if (Subtarget->isTargetAEABI()) { + setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod"); + setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod"); + + setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS); + + setOperationAction(ISD::SDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + } else { + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + } setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); @@ -715,8 +736,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->isTargetDarwin()) { // Non-Darwin platforms may return values in these registers via the // personality function. - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setExceptionPointerRegister(ARM::R0); setExceptionSelectorRegister(ARM::R1); } @@ -724,12 +743,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. - // FIXME: This should be checking for v6k, not just v6. - if (Subtarget->hasDataBarrier() || - (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { - // membarrier needs custom lowering; the rest are legal and handled - // normally. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { + // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and + // handled normally. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); // Custom lowering for 64-bit ops setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); @@ -742,11 +759,20 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); - // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. - setInsertFencesForAtomic(true); + // On v8, we have particularly efficient implementations of atomic fences + // if they can be combined with nearby atomic loads and stores. + if (!Subtarget->hasV8Ops()) { + // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. + setInsertFencesForAtomic(true); + } + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); } else { + // If there's anything we can use as a barrier, go through custom lowering + // for ATOMIC_FENCE. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, + Subtarget->hasAnyDataBarrier() ? Custom : Expand); + // Set them all for expansion, which will force libcalls. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); @@ -843,6 +869,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); } } + + // Combine sin / cos into one node or libcall if possible. + if (Subtarget->hasSinCos()) { + setLibcallName(RTLIB::SINCOS_F32, "sincosf"); + setLibcallName(RTLIB::SINCOS_F64, "sincos"); + if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { + // For iOS, we don't want to the normal expansion of a libcall to + // sincos. We want to issue a libcall to __sincos_stret. + setOperationAction(ISD::FSINCOS, MVT::f64, Custom); + setOperationAction(ISD::FSINCOS, MVT::f32, Custom); + } + } // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine @@ -882,6 +920,44 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } +static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, + bool isThumb2, unsigned &LdrOpc, + unsigned &StrOpc) { + static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB}, + {ARM::LDREXH, ARM::t2LDREXH}, + {ARM::LDREX, ARM::t2LDREX}, + {ARM::LDREXD, ARM::t2LDREXD}}; + static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB}, + {ARM::LDAEXH, ARM::t2LDAEXH}, + {ARM::LDAEX, ARM::t2LDAEX}, + {ARM::LDAEXD, ARM::t2LDAEXD}}; + static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB}, + {ARM::STREXH, ARM::t2STREXH}, + {ARM::STREX, ARM::t2STREX}, + {ARM::STREXD, ARM::t2STREXD}}; + static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB}, + {ARM::STLEXH, ARM::t2STLEXH}, + {ARM::STLEX, ARM::t2STLEX}, + {ARM::STLEXD, ARM::t2STLEXD}}; + + const unsigned (*LoadOps)[2], (*StoreOps)[2]; + if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) + LoadOps = LoadAcqs; + else + LoadOps = LoadBares; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + StoreOps = StoreRels; + else + StoreOps = StoreBares; + + assert(isPowerOf2_32(Size) && Size <= 8 && + "unsupported size for atomic binary op!"); + + LdrOpc = LoadOps[Log2_32(Size)][isThumb2]; + StrOpc = StoreOps[Log2_32(Size)][isThumb2]; +} + // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, @@ -944,6 +1020,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; + case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMN: return "ARMISD::CMN"; @@ -983,7 +1060,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; - case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; @@ -1042,6 +1118,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; + case ARMISD::VMAXNM: return "ARMISD::VMAX"; + case ARMISD::VMINNM: return "ARMISD::VMIN"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; @@ -1069,7 +1147,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { +EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) return getPointerTy(); return VT.changeVectorElementTypeToInteger(); } @@ -1233,7 +1311,7 @@ SDValue ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, SDValue ThisVal) const { @@ -1314,7 +1392,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); @@ -1325,12 +1403,12 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, false, false, 0); } -void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, +void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, - SmallVector<SDValue, 8> &MemOpChains, + SmallVectorImpl<SDValue> &MemOpChains, ISD::ArgFlagsTy Flags) const { SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, @@ -1357,10 +1435,10 @@ SDValue ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; - SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; - SmallVector<SDValue, 32> &OutVals = CLI.OutVals; - SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDLoc &dl = CLI.DL; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; @@ -1406,7 +1484,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!isSibCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + dl); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); @@ -1496,7 +1575,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), - false, false, false, 0); + false, false, false, + DAG.InferPtrAlignment(AddArg)); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } @@ -1705,17 +1785,26 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const uint32_t *Mask; - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); - if (isThisReturn) - // For 'this' returns, use the R0-preserving mask - Mask = ARI->getThisReturnPreservedMask(CallConv); - else - Mask = ARI->getCallPreservedMask(CallConv); + if (!isTailCall) { + const uint32_t *Mask; + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); + if (isThisReturn) { + // For 'this' returns, use the R0-preserving mask if applicable + Mask = ARI->getThisReturnPreservedMask(CallConv); + if (!Mask) { + // Set isThisReturn to false if the calling convention is not one that + // allows 'returned' to be modeled in this way, so LowerCallResult does + // not try to pass 'this' straight through + isThisReturn = false; + Mask = ARI->getCallPreservedMask(CallConv); + } + } else + Mask = ARI->getCallPreservedMask(CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + } if (InFlag.getNode()) Ops.push_back(InFlag); @@ -1729,7 +1818,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag); + DAG.getIntPtrConstant(0, true), InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); @@ -1795,7 +1884,7 @@ ARMTargetLowering::HandleByVal( // else parameter would be splitted between registers and stack, // end register would be r4 in this case. unsigned ByValRegBegin = reg; - unsigned ByValRegEnd = (size < excess) ? reg + size/4 : ARM::R4; + unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); // Note, first register is allocated in the beginning of function already, // allocate remained amount of registers we need. @@ -1886,6 +1975,12 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isVarArg && !Outs.empty()) return false; + // Exception-handling functions need a special set of instructions to indicate + // a return to the hardware. Tail-calling another function would probably + // break this. + if (CallerF->hasFnAttribute("interrupt")) + return false; + // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. if (isCalleeStructRet || isCallerStructRet) @@ -2014,12 +2109,45 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, isVarArg)); } +static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, + SDLoc DL, SelectionDAG &DAG) { + const MachineFunction &MF = DAG.getMachineFunction(); + const Function *F = MF.getFunction(); + + StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); + + // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset + // version of the "preferred return address". These offsets affect the return + // instruction if this is a return from PL1 without hypervisor extensions. + // IRQ/FIQ: +4 "subs pc, lr, #4" + // SWI: 0 "subs pc, lr, #0" + // ABORT: +4 "subs pc, lr, #4" + // UNDEF: +4/+2 "subs pc, lr, #0" + // UNDEF varies depending on where the exception came from ARM or Thumb + // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. + + int64_t LROffset; + if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || + IntKind == "ABORT") + LROffset = 4; + else if (IntKind == "SWI" || IntKind == "UNDEF") + LROffset = 0; + else + report_fatal_error("Unsupported interrupt attribute. If present, value " + "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); + + RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); + + return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, + RetOps.data(), RetOps.size()); +} + SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { + SDLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector<CCValAssign, 16> RVLocs; @@ -2099,6 +2227,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); + // CPUs which aren't M-class use a special sequence to return from + // exceptions (roughly, any instruction setting pc and cpsr simultaneously, + // though we use "subs pc, lr, #N"). + // + // M-class CPUs actually use a normal return sequence with a special + // (hardware-provided) value in LR, so the normal code path works. + if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && + !Subtarget->isMClass()) { + if (Subtarget->isThumb1Only()) + report_fatal_error("interrupt attribute is not supported in Thumb1"); + return LowerInterruptReturn(RetOps, dl, DAG); + } + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps.data(), RetOps.size()); } @@ -2147,7 +2288,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { Copy = *Copy->use_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; - Chain = Copy->getOperand(0); + TCChain = Copy->getOperand(0); } else { return false; } @@ -2155,7 +2296,8 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { bool HasRet = false; for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); UI != UE; ++UI) { - if (UI->getOpcode() != ARMISD::RET_FLAG) + if (UI->getOpcode() != ARMISD::RET_FLAG && + UI->getOpcode() != ARMISD::INTRET_FLAG) return false; HasRet = true; } @@ -2186,7 +2328,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); SDValue Res; if (CP->isMachineConstantPoolEntry()) @@ -2207,7 +2349,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = 0; - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); EVT PtrVT = getPointerTy(); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); @@ -2236,7 +2378,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const { - DebugLoc dl = GA->getDebugLoc(); + SDLoc dl(GA); EVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; MachineFunction &MF = DAG.getMachineFunction(); @@ -2279,7 +2421,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SelectionDAG &DAG, TLSModel::Model model) const { const GlobalValue *GV = GA->getGlobal(); - DebugLoc dl = GA->getDebugLoc(); + SDLoc dl(GA); SDValue Offset; SDValue Chain = DAG.getEntryNode(); EVT PtrVT = getPointerTy(); @@ -2349,7 +2491,7 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); @@ -2392,7 +2534,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); @@ -2457,7 +2599,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", @@ -2473,7 +2615,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue Val = DAG.getConstant(0, MVT::i32); return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), @@ -2482,7 +2624,7 @@ ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), Op.getOperand(1), DAG.getConstant(0, MVT::i32)); } @@ -2491,7 +2633,7 @@ SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::arm_thread_pointer: { @@ -2527,7 +2669,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, case Intrinsic::arm_neon_vmullu: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) ? ARMISD::VMULLs : ARMISD::VMULLu; - return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } } @@ -2536,19 +2678,33 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // FIXME: handle "fence singlethread" more efficiently. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && - "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); + "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"); return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, MVT::i32)); } - return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(ARM_MB::ISH, MVT::i32)); + ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); + AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); + unsigned Domain = ARM_MB::ISH; + if (Subtarget->isMClass()) { + // Only a full system barrier exists in the M-class architectures. + Domain = ARM_MB::SY; + } else if (Subtarget->isSwift() && Ord == Release) { + // Swift happens to implement ISHST barriers in a way that's compatible with + // Release semantics but weaker than ISH so we'd be fools not to use + // it. Beware: other processors probably don't! + Domain = ARM_MB::ISHST; + } + + return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(Intrinsic::arm_dmb, MVT::i32), + DAG.getConstant(Domain, MVT::i32)); } static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, @@ -2559,7 +2715,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, // Just preserve the chain. return Op.getOperand(0); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; if (!isRead && (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) @@ -2584,7 +2740,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); @@ -2595,7 +2751,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -2630,6 +2786,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, void ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, unsigned InRegsParamRecordIdx, + unsigned ArgSize, unsigned &ArgRegsSize, unsigned &ArgRegsSaveSize) const { @@ -2648,7 +2805,29 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); ArgRegsSize = NumGPRs * 4; - ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1); + + // If parameter is split between stack and GPRs... + if (NumGPRs && Align == 8 && + (ArgRegsSize < ArgSize || + InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { + // Add padding for part of param recovered from GPRs, so + // its last byte must be at address K*8 - 1. + // We need to do it, since remained (stack) part of parameter has + // stack alignment, and we need to "attach" "GPRs head" without gaps + // to it: + // Stack: + // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... + // [ [padding] [GPRs head] ] [ Tail passed via stack .... + // + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned Padding = + ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - + (ArgRegsSize + AFI->getArgRegsSaveSize()); + ArgRegsSaveSize = ArgRegsSize + Padding; + } else + // We don't need to extend regs save size for byval parameters if they + // are passed via GPRs only. + ArgRegsSaveSize = ArgRegsSize; } // The remaining GPRs hold either the beginning of variable-argument @@ -2661,11 +2840,12 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, // Return: The frame index registers were stored into. int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, unsigned OffsetFromOrigArg, unsigned ArgOffset, + unsigned ArgSize, bool ForceMutable) const { // Currently, two use-cases possible: @@ -2690,12 +2870,13 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, lastRegToSaveIndex = REnd - ARM::R0; } else { firstRegToSaveIndex = CCInfo.getFirstUnallocated - (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + (GPRArgRegs, array_lengthof(GPRArgRegs)); lastRegToSaveIndex = 4; } unsigned ArgRegsSize, ArgRegsSaveSize; - computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize); + computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, + ArgRegsSize, ArgRegsSaveSize); // Store any by-val regs to their spots on the stack so that they may be // loaded by deferencing the result of formal parameter pointer or va_next. @@ -2703,9 +2884,17 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, // was initialized, it can't be initialized again. if (ArgRegsSaveSize) { + unsigned Padding = ArgRegsSaveSize - ArgRegsSize; + + if (Padding) { + assert(AFI->getStoredByValParamsPadding() == 0 && + "The only parameter may be padded."); + AFI->setStoredByValParamsPadding(Padding); + } + int FrameIndex = MFI->CreateFixedObject( ArgRegsSaveSize, - ArgOffset + ArgRegsSaveSize - ArgRegsSize, + Padding + ArgOffset, false); SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); @@ -2737,13 +2926,14 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, return FrameIndex; } else // This will point to the next argument passed via stack. - return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable); + return MFI->CreateFixedObject( + 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); } // Setup stack frame, the va_list pointer will start from. void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, unsigned ArgOffset, bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -2756,7 +2946,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // argument passed via stack. int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), - 0, ArgOffset, ForceMutable); + 0, ArgOffset, 0, ForceMutable); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -2766,7 +2956,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -2896,12 +3086,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CurByValIndex, Ins[VA.getValNo()].PartOffset, VA.getLocMemOffset(), + Flags.getByValSize(), true /*force mutable frames*/); InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); CCInfo.nextInRegsParam(); } else { + unsigned FIOffset = VA.getLocMemOffset() + + AFI->getStoredByValParamsPadding(); int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, - VA.getLocMemOffset(), true); + FIOffset, true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -2943,7 +3136,7 @@ static bool isFloatingPointZero(SDValue Op) { SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); if (!isLegalICmpImmediate(C)) { @@ -3001,7 +3194,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); @@ -3015,7 +3208,7 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, SDValue ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { unsigned Opc = Cmp.getOpcode(); - DebugLoc DL = Cmp.getDebugLoc(); + SDLoc DL(Cmp); if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); @@ -3035,7 +3228,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Convert: // @@ -3083,6 +3276,61 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SelectTrue, SelectFalse, ISD::SETNE); } +static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) { + if (CC == ISD::SETNE) + return ISD::SETEQ; + return ISD::getSetCCSwappedOperands(CC); +} + +static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, + bool &swpCmpOps, bool &swpVselOps) { + // Start by selecting the GE condition code for opcodes that return true for + // 'equality' + if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || + CC == ISD::SETULE) + CondCode = ARMCC::GE; + + // and GT for opcodes that return false for 'equality'. + else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || + CC == ISD::SETULT) + CondCode = ARMCC::GT; + + // Since we are constrained to GE/GT, if the opcode contains 'less', we need + // to swap the compare operands. + if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || + CC == ISD::SETULT) + swpCmpOps = true; + + // Both GT and GE are ordered comparisons, and return false for 'unordered'. + // If we have an unordered opcode, we need to swap the operands to the VSEL + // instruction (effectively negating the condition). + // + // This also has the effect of swapping which one of 'less' or 'greater' + // returns true, so we also swap the compare operands. It also switches + // whether we return true for 'equality', so we compensate by picking the + // opposite condition code to our original choice. + if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || + CC == ISD::SETUGT) { + swpCmpOps = !swpCmpOps; + swpVselOps = !swpVselOps; + CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; + } + + // 'ordered' is 'anything but unordered', so use the VS condition code and + // swap the VSEL operands. + if (CC == ISD::SETO) { + CondCode = ARMCC::VS; + swpVselOps = true; + } + + // 'unordered or not equal' is 'anything but equal', so use the EQ condition + // code and swap the VSEL operands. + if (CC == ISD::SETUNE) { + CondCode = ARMCC::EQ; + swpVselOps = true; + } +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -3090,18 +3338,69 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (LHS.getValueType() == MVT::i32) { + // Try to generate VSEL on ARMv8. + // The VSEL instruction can't use all the usual ARM condition + // codes: it only has two bits to select the condition code, so it's + // constrained to use only GE, GT, VS and EQ. + // + // To implement all the various ISD::SETXXX opcodes, we sometimes need to + // swap the operands of the previous compare instruction (effectively + // inverting the compare condition, swapping 'less' and 'greater') and + // sometimes need to swap the operands to the VSEL (which inverts the + // condition in the sense of firing whenever the previous condition didn't) + if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { + ARMCC::CondCodes CondCode = IntCCToARMCC(CC); + if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || + CondCode == ARMCC::VC || CondCode == ARMCC::NE) { + CC = getInverseCCForVSEL(CC); + std::swap(TrueVal, FalseVal); + } + } + SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, + Cmp); } ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); + // Try to generate VSEL on ARMv8. + if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { + // We can select VMAXNM/VMINNM from a compare followed by a select with the + // same operands, as follows: + // c = fcmp [ogt, olt, ugt, ult] a, b + // select c, a, b + // We only do this in unsafe-fp-math, because signed zeros and NaNs are + // handled differently than the original code sequence. + if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal && + RHS == FalseVal) { + if (CC == ISD::SETOGT || CC == ISD::SETUGT) + return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); + if (CC == ISD::SETOLT || CC == ISD::SETULT) + return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + } + + bool swpCmpOps = false; + bool swpVselOps = false; + checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); + + if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || + CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { + if (swpCmpOps) + std::swap(LHS, RHS); + if (swpVselOps) + std::swap(TrueVal, FalseVal); + } + } + SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); @@ -3145,7 +3444,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant(0, MVT::i32); if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) - return DAG.getLoad(MVT::i32, Op.getDebugLoc(), + return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), Ld->getAlignment()); @@ -3163,7 +3462,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { SDValue Ptr = Ld->getBasePtr(); - RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), + RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), @@ -3171,9 +3470,9 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, EVT PtrType = Ptr.getValueType(); unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); - SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op), PtrType, Ptr, DAG.getConstant(4, PtrType)); - RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), + RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), NewPtr, Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), @@ -3193,7 +3492,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); bool LHSSeenZero = false; bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); @@ -3243,7 +3542,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; @@ -3284,7 +3583,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT PTy = getPointerTy(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); @@ -3320,7 +3619,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Op.getValueType().getVectorElementType() == MVT::i32) { if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) @@ -3342,7 +3641,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Opc; switch (Op.getOpcode()) { @@ -3360,7 +3659,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { if (VT.getVectorElementType() == MVT::f32) @@ -3396,7 +3695,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Opc; switch (Op.getOpcode()) { @@ -3417,7 +3716,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Implement fcopysign with a fabs and a conditional fneg. SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || @@ -3501,7 +3800,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ MFI->setReturnAddressIsTaken(true); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); @@ -3521,7 +3820,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful + SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) ? ARM::R7 : ARM::R11; @@ -3533,47 +3832,6 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } -/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), -/// and size(DestVec) > 128-bits. -/// This is achieved by doing the one extension from the SrcVec, splitting the -/// result, extending these parts, and then concatenating these into the -/// destination. -static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { - SDValue Op = N->getOperand(0); - EVT SrcVT = Op.getValueType(); - EVT DestVT = N->getValueType(0); - - assert(DestVT.getSizeInBits() > 128 && - "Custom sext/zext expansion needs >128-bit vector."); - // If this is a normal length extension, use the default expansion. - if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && - SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) - return SDValue(); - - DebugLoc dl = N->getDebugLoc(); - unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); - unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); - unsigned NumElts = SrcVT.getVectorNumElements(); - LLVMContext &Ctx = *DAG.getContext(); - SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; - - EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), - NumElts); - EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), - NumElts/2); - EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), - NumElts/2); - - Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); - SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, - DAG.getIntPtrConstant(0)); - SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, - DAG.getIntPtrConstant(NumElts/2)); - ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); - ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); -} - /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -3581,7 +3839,7 @@ static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { /// vectors), since the legalizer won't know what to do with that. static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); // This function is only supposed to be called for i64 types, either as the @@ -3618,7 +3876,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { /// not support i64 elements, so sometimes the zero vectors will need to be /// explicitly constructed. Regardless, use a canonical VMOV to create the /// zero vector. -static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { +static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) { assert(VT.isVector() && "Expected a vector type"); // The canonical modified immediate encoding of a zero vector is....0! SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); @@ -3634,7 +3892,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); @@ -3670,7 +3928,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); @@ -3703,7 +3961,7 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) // so that the shift + and get folded into a bitfield extract. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, DAG.getConstant(Intrinsic::arm_get_fpscr, MVT::i32)); @@ -3718,7 +3976,7 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (!ST->hasV6T2Ops()) return SDValue(); @@ -3742,7 +4000,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); @@ -3764,7 +4022,7 @@ static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { /// v4i16:Extracted = [k0 k1 k2 k3 ] static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue BitCounts = getCTPOP16BitCounts(N, DAG); if (VT.is64BitVector()) { @@ -3799,7 +4057,7 @@ static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { /// static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; @@ -3838,7 +4096,7 @@ static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (!VT.isVector()) return SDValue(); @@ -3873,7 +4131,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // We can get here for a node like i32 = ISD::SHL i32, i64 if (VT != MVT::i64) @@ -3919,7 +4177,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue CC = Op.getOperand(2); EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Op.getOperand(1).getValueType().isFloatingPoint()) { switch (SetCCOpcode) { @@ -4177,18 +4435,26 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { - if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) + if (!ST->hasVFP3()) return SDValue(); + bool IsDouble = Op.getValueType() == MVT::f64; ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); - assert(Op.getValueType() == MVT::f32 && - "ConstantFP custom lowering should only occur for f32."); // Try splatting with a VMOV.f32... APFloat FPVal = CFP->getValueAPF(); - int ImmVal = ARM_AM::getFP32Imm(FPVal); + int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); + if (ImmVal != -1) { - DebugLoc DL = Op.getDebugLoc(); + if (IsDouble || !ST->useNEONForSinglePrecisionFP()) { + // We have code in place to select a valid ConstantFP already, no need to + // do any mangling. + return Op; + } + + // It's a float and we are trying to use NEON operations where + // possible. Lower it to a splat followed by an extract. + SDLoc DL(Op); SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal); @@ -4196,15 +4462,31 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, DAG.getConstant(0, MVT::i32)); } - // If that fails, try a VMOV.i32 + // The rest of our options are NEON only, make sure that's allowed before + // proceeding.. + if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP())) + return SDValue(); + EVT VMovVT; - unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); - SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, - VMOVModImm); + uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue(); + + // It wouldn't really be worth bothering for doubles except for one very + // important value, which does happen to match: 0.0. So make sure we don't do + // anything stupid. + if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32)) + return SDValue(); + + // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). + SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT, + false, VMOVModImm); if (NewVal != SDValue()) { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, NewVal); + if (IsDouble) + return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); + + // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, @@ -4212,11 +4494,16 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, } // Finally, try a VMVN.i32 - NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, - VMVNModImm); + NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT, + false, VMVNModImm); if (NewVal != SDValue()) { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); + + if (IsDouble) + return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); + + // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, @@ -4475,7 +4762,7 @@ static bool isReverseMask(ArrayRef<int> M, EVT VT) { // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, - const ARMSubtarget *ST, DebugLoc dl) { + const ARMSubtarget *ST, SDLoc dl) { uint64_t Val; if (!isa<ConstantSDNode>(N)) return SDValue(); @@ -4496,7 +4783,7 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); APInt SplatBits, SplatUndef; @@ -4580,7 +4867,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (ValueCounts.size() == 0) return DAG.getUNDEF(VT); - if (isOnlyLowElement) + // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. + // Keep going if we are hitting this case. + if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); unsigned EltSize = VT.getVectorElementType().getSizeInBits(); @@ -4679,6 +4968,24 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we + // know the default expansion would otherwise fall back on something even + // worse. For a vector with one or two non-undef values, that's + // scalar_to_vector for the elements followed by a shuffle (provided the + // shuffle is valid for the target) and materialization element by element + // on the stack followed by a load for everything else. + if (!isConstant && !usesOnlyOneValue) { + SDValue Vec = DAG.getUNDEF(VT); + for (unsigned i = 0 ; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + SDValue LaneIdx = DAG.getConstant(i, MVT::i32); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); + } + return Vec; + } + return SDValue(); } @@ -4686,7 +4993,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // shuffle in combination with VEXTs. SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); @@ -4875,7 +5182,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) { + SDLoc dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); @@ -4955,7 +5262,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, // Check to see if we can use the VTBL instruction. SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SmallVector<SDValue, 8> VTBLMask; for (ArrayRef<int>::iterator @@ -4974,7 +5281,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, SelectionDAG &DAG) { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue OpLHS = Op.getOperand(0); EVT VT = OpLHS.getValueType(); @@ -4992,7 +5299,7 @@ static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); @@ -5156,7 +5463,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue Vec = Op.getOperand(0); if (Op.getValueType() == MVT::i32 && Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); } @@ -5168,7 +5475,7 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { // two 64-bit vectors are concatenated to a 128-bit vector. assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue Val = DAG.getUNDEF(MVT::v2f64); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -5291,7 +5598,7 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, // Must extend size to at least 64 bits to be used as an operand for VMULL. EVT NewVT = getExtensionTo64Bits(OrigTy); - return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N); + return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); } /// SkipLoadExtensionForVMULL - return a load of the original vector size that @@ -5304,7 +5611,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // The load already has the right type. if (ExtendedTy == LD->getMemoryVT()) - return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), + return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); @@ -5312,7 +5619,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // We need to create a zextload/sextload. We cannot just create a load // followed by a zext/zext node because LowerMUL is also run during normal // operation legalization where we can't create illegal types. - return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy, + return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); @@ -5341,7 +5648,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { assert(BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); } // Construct a new BUILD_VECTOR with elements truncated to half the size. @@ -5358,7 +5665,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); } @@ -5430,7 +5737,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { } // Legalize to a VMULL instruction. - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Op0; SDValue Op1 = SkipExtensionForVMULL(N1, DAG); if (!isMLA) { @@ -5460,7 +5767,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { } static SDValue -LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { +LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); @@ -5489,7 +5796,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { } static SDValue -LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { +LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { SDValue N2; // Convert to float. // float4 yf = vcvt_f32_s32(vmovl_s16(y)); @@ -5530,7 +5837,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; @@ -5565,7 +5872,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; @@ -5649,12 +5956,76 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { } if (!ExtraOp) - return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); - return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } +SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin()); + + // For iOS, we want to call an alternative entry point: __sincos_stret, + // return values are passed via sret. + SDLoc dl(Op); + SDValue Arg = Op.getOperand(0); + EVT ArgVT = Arg.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Pair of floats / doubles used to pass the result. + StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + + // Create stack object for sret. + const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy); + const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy); + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy()); + + ArgListTy Args; + ArgListEntry Entry; + + Entry.Node = SRet; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isSRet = true; + Args.push_back(Entry); + + Entry.Node = Arg; + Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + const char *LibcallName = (ArgVT == MVT::f64) + ? "__sincos_stret" : "__sincosf_stret"; + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, + CallingConv::C, /*isTaillCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed*/false, + Callee, Args, DAG, dl); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); + + SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, + MachinePointerInfo(), false, false, false, 0); + + // Address of cos field. + SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet, + DAG.getIntPtrConstant(ArgVT.getStoreSize())); + SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, + MachinePointerInfo(), false, false, false, 0); + + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, + LoadSin.getValue(0), LoadCos.getValue(0)); +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { // Monotonic load/store is legal for all targets if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) @@ -5665,40 +6036,73 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { return SDValue(); } - static void ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, - SelectionDAG &DAG, unsigned NewOp) { - DebugLoc dl = Node->getDebugLoc(); + SelectionDAG &DAG) { + SDLoc dl(Node); assert (Node->getValueType(0) == MVT::i64 && "Only know how to expand i64 atomics"); + AtomicSDNode *AN = cast<AtomicSDNode>(Node); SmallVector<SDValue, 6> Ops; Ops.push_back(Node->getOperand(0)); // Chain Ops.push_back(Node->getOperand(1)); // Ptr - // Low part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(0))); - // High part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(1))); - if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { - // High part of Val1 + for(unsigned i=2; i<Node->getNumOperands(); i++) { + // Low part Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(3), DAG.getIntPtrConstant(0))); - // High part of Val2 + Node->getOperand(i), DAG.getIntPtrConstant(0))); + // High part Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(3), DAG.getIntPtrConstant(1))); + Node->getOperand(i), DAG.getIntPtrConstant(1))); } SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue Result = - DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, - cast<MemSDNode>(Node)->getMemOperand()); + DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(), + cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(), + AN->getSynchScope()); SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); Results.push_back(Result.getValue(2)); } +static void ReplaceREADCYCLECOUNTER(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + SDLoc DL(N); + SDValue Cycles32, OutChain; + + if (Subtarget->hasPerfMon()) { + // Under Power Management extensions, the cycle-count is: + // mrc p15, #0, <Rt>, c9, c13, #0 + SDValue Ops[] = { N->getOperand(0), // Chain + DAG.getConstant(Intrinsic::arm_mrc, MVT::i32), + DAG.getConstant(15, MVT::i32), + DAG.getConstant(0, MVT::i32), + DAG.getConstant(9, MVT::i32), + DAG.getConstant(13, MVT::i32), + DAG.getConstant(0, MVT::i32) + }; + + Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, + DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], + array_lengthof(Ops)); + OutChain = Cycles32.getValue(1); + } else { + // Intrinsic is defined to return 0 on unsupported platforms. Technically + // there are older ARM CPUs that have implementation-specific ways of + // obtaining this information (FIXME!). + Cycles32 = DAG.getConstant(0, MVT::i32); + OutChain = DAG.getEntryNode(); + } + + + SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, + Cycles32, DAG.getConstant(0, MVT::i32)); + Results.push_back(Cycles64); + Results.push_back(OutChain); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -5753,6 +6157,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); + case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); + case ISD::SDIVREM: + case ISD::UDIVREM: return LowerDivRem(Op, DAG); } } @@ -5768,49 +6175,28 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - Res = ExpandVectorExtension(N, DAG); - break; case ISD::SRL: case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; - case ISD::ATOMIC_LOAD_ADD: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); + case ISD::READCYCLECOUNTER: + ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; + case ISD::ATOMIC_STORE: + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_AND: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); - return; case ISD::ATOMIC_LOAD_NAND: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); - return; case ISD::ATOMIC_LOAD_OR: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); - return; case ISD::ATOMIC_LOAD_SUB: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); - return; case ISD::ATOMIC_LOAD_XOR: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); - return; case ISD::ATOMIC_SWAP: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); - return; case ISD::ATOMIC_CMP_SWAP: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); - return; case ISD::ATOMIC_LOAD_MIN: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG); - return; case ISD::ATOMIC_LOAD_UMIN: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG); - return; case ISD::ATOMIC_LOAD_MAX: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG); - return; case ISD::ATOMIC_LOAD_UMAX: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG); + ReplaceATOMIC_OP_64(N, Results, DAG); return; } if (Res.getNode()) @@ -5830,6 +6216,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, unsigned oldval = MI->getOperand(2).getReg(); unsigned newval = MI->getOperand(3).getReg(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -5845,21 +6232,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, } unsigned ldrOpc, strOpc; - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); - case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; - break; - case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; - break; - case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; - break; - } + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -5939,6 +6312,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned dest = MI->getOperand(0).getReg(); unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -5946,24 +6320,11 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, if (isThumb2) { MRI.constrainRegClass(dest, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + MRI.constrainRegClass(incr, &ARM::rGPRRegClass); } unsigned ldrOpc, strOpc; - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); - case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; - break; - case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; - break; - case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; - break; - } + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -6047,6 +6408,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); unsigned oldval = dest; + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6054,24 +6416,20 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, if (isThumb2) { MRI.constrainRegClass(dest, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + MRI.constrainRegClass(incr, &ARM::rGPRRegClass); } unsigned ldrOpc, strOpc, extendOpc; + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!"); case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; break; case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; break; case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; extendOpc = 0; break; } @@ -6115,7 +6473,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // Sign extend the value, if necessary. if (signExtend && extendOpc) { - oldval = MRI.createVirtualRegister(&ARM::GPRRegClass); + oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass + : &ARM::GPRnopcRegClass); + if (!isThumb2) + MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass); AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) .addReg(dest) .addImm(0)); @@ -6153,7 +6514,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, unsigned Op1, unsigned Op2, bool NeedsCarry, bool IsCmpxchg, bool IsMinMax, ARMCC::CondCodes CC) const { - // This also handles ATOMIC_SWAP, indicated by Op1==0. + // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -6161,11 +6522,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MachineFunction::iterator It = BB; ++It; + bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64); + unsigned offset = (isStore ? -2 : 0); unsigned destlo = MI->getOperand(0).getReg(); unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(2).getReg(); - unsigned vallo = MI->getOperand(3).getReg(); - unsigned valhi = MI->getOperand(4).getReg(); + unsigned ptr = MI->getOperand(offset+2).getReg(); + unsigned vallo = MI->getOperand(offset+3).getReg(); + unsigned valhi = MI->getOperand(offset+4).getReg(); + unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(OrdIdx).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6174,8 +6539,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + MRI.constrainRegClass(vallo, &ARM::rGPRRegClass); + MRI.constrainRegClass(valhi, &ARM::rGPRRegClass); } + unsigned ldrOpc, strOpc; + getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *contBB = 0, *cont2BB = 0; if (IsCmpxchg || IsMinMax) @@ -6215,21 +6585,23 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // fallthrough --> exitMBB BB = loopMBB; - // Load - if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD)) - .addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr)); - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD)) - .addReg(GPRPair0, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); + if (!isStore) { + // Load + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) + .addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr)); + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) + .addReg(GPRPair0, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); + } } unsigned StoreLo, StoreHi; @@ -6281,7 +6653,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // Store if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) + MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass); + MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); } else { // Marshal a pair... @@ -6299,7 +6673,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, .addImm(ARM::gsub_1); // ...and store it - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess) + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) .addReg(StorePair).addReg(ptr)); } // Cmp+jump @@ -6320,6 +6694,51 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const { + + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + unsigned destlo = MI->getOperand(0).getReg(); + unsigned desthi = MI->getOperand(1).getReg(); + unsigned ptr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); + DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + if (isThumb2) { + MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); + MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); + MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + } + unsigned ldrOpc, strOpc; + getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); + + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc)); + + if (isThumb2) { + MIB.addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr); + + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + MIB.addReg(GPRPair0, RegState::Define).addReg(ptr); + + // Copy GPRPair0 into dest. (This copy will normally be coalesced.) + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); + } + AddDefaultPred(MIB); + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering:: @@ -6851,8 +7270,109 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { llvm_unreachable("Expecting a BB with two successors!"); } -MachineBasicBlock *ARMTargetLowering:: -EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { +/// Return the load opcode for a given load size. If load size >= 8, +/// neon opcode will be returned. +static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) { + if (LdSize >= 8) + return LdSize == 16 ? ARM::VLD1q32wb_fixed + : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0; + if (IsThumb1) + return LdSize == 4 ? ARM::tLDRi + : LdSize == 2 ? ARM::tLDRHi + : LdSize == 1 ? ARM::tLDRBi : 0; + if (IsThumb2) + return LdSize == 4 ? ARM::t2LDR_POST + : LdSize == 2 ? ARM::t2LDRH_POST + : LdSize == 1 ? ARM::t2LDRB_POST : 0; + return LdSize == 4 ? ARM::LDR_POST_IMM + : LdSize == 2 ? ARM::LDRH_POST + : LdSize == 1 ? ARM::LDRB_POST_IMM : 0; +} + +/// Return the store opcode for a given store size. If store size >= 8, +/// neon opcode will be returned. +static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) { + if (StSize >= 8) + return StSize == 16 ? ARM::VST1q32wb_fixed + : StSize == 8 ? ARM::VST1d32wb_fixed : 0; + if (IsThumb1) + return StSize == 4 ? ARM::tSTRi + : StSize == 2 ? ARM::tSTRHi + : StSize == 1 ? ARM::tSTRBi : 0; + if (IsThumb2) + return StSize == 4 ? ARM::t2STR_POST + : StSize == 2 ? ARM::t2STRH_POST + : StSize == 1 ? ARM::t2STRB_POST : 0; + return StSize == 4 ? ARM::STR_POST_IMM + : StSize == 2 ? ARM::STRH_POST + : StSize == 1 ? ARM::STRB_POST_IMM : 0; +} + +/// Emit a post-increment load operation with given size. The instructions +/// will be added to BB at Pos. +static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos, + const TargetInstrInfo *TII, DebugLoc dl, + unsigned LdSize, unsigned Data, unsigned AddrIn, + unsigned AddrOut, bool IsThumb1, bool IsThumb2) { + unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); + assert(LdOpc != 0 && "Should have a load opcode"); + if (LdSize >= 8) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define).addReg(AddrIn) + .addImm(0)); + } else if (IsThumb1) { + // load + update AddrIn + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrIn).addImm(0)); + MachineInstrBuilder MIB = + BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); + MIB = AddDefaultT1CC(MIB); + MIB.addReg(AddrIn).addImm(LdSize); + AddDefaultPred(MIB); + } else if (IsThumb2) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define).addReg(AddrIn) + .addImm(LdSize)); + } else { // arm + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define).addReg(AddrIn) + .addReg(0).addImm(LdSize)); + } +} + +/// Emit a post-increment store operation with given size. The instructions +/// will be added to BB at Pos. +static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos, + const TargetInstrInfo *TII, DebugLoc dl, + unsigned StSize, unsigned Data, unsigned AddrIn, + unsigned AddrOut, bool IsThumb1, bool IsThumb2) { + unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); + assert(StOpc != 0 && "Should have a store opcode"); + if (StSize >= 8) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(AddrIn).addImm(0).addReg(Data)); + } else if (IsThumb1) { + // store + update AddrIn + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data) + .addReg(AddrIn).addImm(0)); + MachineInstrBuilder MIB = + BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); + MIB = AddDefaultT1CC(MIB); + MIB.addReg(AddrIn).addImm(StSize); + AddDefaultPred(MIB); + } else if (IsThumb2) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(Data).addReg(AddrIn).addImm(StSize)); + } else { // arm + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(Data).addReg(AddrIn).addReg(0) + .addImm(StSize)); + } +} + +MachineBasicBlock * +ARMTargetLowering::EmitStructByval(MachineInstr *MI, + MachineBasicBlock *BB) const { // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). // Otherwise, we will generate unrolled scalar copies. @@ -6867,23 +7387,18 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { unsigned Align = MI->getOperand(3).getImm(); DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned ldrOpc, strOpc, UnitSize = 0; + unsigned UnitSize = 0; + const TargetRegisterClass *TRC = 0; + const TargetRegisterClass *VecTRC = 0; - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - const TargetRegisterClass *TRC_Vec = 0; + bool IsThumb1 = Subtarget->isThumb1Only(); + bool IsThumb2 = Subtarget->isThumb2(); if (Align & 1) { - ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; - strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; UnitSize = 1; } else if (Align & 2) { - ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST; - strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; UnitSize = 2; } else { // Check whether we can use NEON instructions. @@ -6891,27 +7406,27 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { - if ((Align % 16 == 0) && SizeVal >= 16) { - ldrOpc = ARM::VLD1q32wb_fixed; - strOpc = ARM::VST1q32wb_fixed; + if ((Align % 16 == 0) && SizeVal >= 16) UnitSize = 16; - TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass; - } - else if ((Align % 8 == 0) && SizeVal >= 8) { - ldrOpc = ARM::VLD1d32wb_fixed; - strOpc = ARM::VST1d32wb_fixed; + else if ((Align % 8 == 0) && SizeVal >= 8) UnitSize = 8; - TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass; - } } // Can't use NEON instructions. - if (UnitSize == 0) { - ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; - strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; + if (UnitSize == 0) UnitSize = 4; - } } + // Select the correct opcode and register class for unit size load/store + bool IsNeon = UnitSize >= 8; + TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass + : (const TargetRegisterClass *)&ARM::GPRRegClass; + if (IsNeon) + VecTRC = UnitSize == 16 + ? (const TargetRegisterClass *)&ARM::DPairRegClass + : UnitSize == 8 + ? (const TargetRegisterClass *)&ARM::DPRRegClass + : 0; + unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -6922,34 +7437,13 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { - unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (UnitSize >= 8) { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc), scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(destIn).addImm(0).addReg(scratch)); - } else if (isThumb2) { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc), scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addImm(UnitSize)); - } else { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc), scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0) - .addImm(UnitSize)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(UnitSize)); - } + unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); + emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut, + IsThumb1, IsThumb2); + emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut, + IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } @@ -6957,30 +7451,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // Handle the leftover bytes with LDRB and STRB. // [scratch, srcOut] = LDRB_POST(srcIn, 1) // [destOut] = STRB_POST(scratch, destIn, 1) - ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; - strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; for (unsigned i = 0; i < BytesLeft; i++) { - unsigned scratch = MRI.createVirtualRegister(TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } else { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn) - .addReg(0).addImm(1)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } + unsigned scratch = MRI.createVirtualRegister(TRC); + emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut, + IsThumb1, IsThumb2); + emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut, + IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } @@ -7021,17 +7499,16 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // Load an immediate to varEnd. unsigned varEnd = MRI.createVirtualRegister(TRC); - if (isThumb2) { - unsigned VReg1 = varEnd; + if (IsThumb2) { + unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) - VReg1 = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1) - .addImm(LoopSize & 0xFFFF)); + Vtmp = MRI.createVirtualRegister(TRC); + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp) + .addImm(LoopSize & 0xFFFF)); if ((LoopSize & 0xFFFF0000) != 0) AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) - .addReg(VReg1) - .addImm(LoopSize >> 16)); + .addReg(Vtmp).addImm(LoopSize >> 16)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); @@ -7043,10 +7520,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { Align = getDataLayout()->getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp)) - .addReg(varEnd, RegState::Define) - .addConstantPoolIndex(Idx) - .addImm(0)); + if (IsThumb1) + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( + varEnd, RegState::Define).addConstantPoolIndex(Idx)); + else + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg( + varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0)); } BB->addSuccessor(loopMBB); @@ -7075,39 +7554,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) - unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); - if (UnitSize >= 8) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) - .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0)); - - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) - .addReg(destPhi).addImm(0).addReg(scratch)); - } else if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) - .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); - - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) - .addReg(scratch).addReg(destPhi) - .addImm(UnitSize)); - } else { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) - .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0) - .addImm(UnitSize)); - - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) - .addReg(scratch).addReg(destPhi) - .addReg(0).addImm(UnitSize)); - } + unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); + emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop, + IsThumb1, IsThumb2); + emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop, + IsThumb1, IsThumb2); // Decrement loop variable by UnitSize. - MachineInstrBuilder MIB = BuildMI(BB, dl, - TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); - AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); - MIB->getOperand(5).setReg(ARM::CPSR); - MIB->getOperand(5).setIsDef(true); - - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + if (IsThumb1) { + MachineInstrBuilder MIB = + BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop); + MIB = AddDefaultT1CC(MIB); + MIB.addReg(varPhi).addImm(UnitSize); + AddDefaultPred(MIB); + } else { + MachineInstrBuilder MIB = + BuildMI(*BB, BB->end(), dl, + TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); + AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); + MIB->getOperand(5).setReg(ARM::CPSR); + MIB->getOperand(5).setIsDef(true); + } + BuildMI(*BB, BB->end(), dl, + TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); // loopMBB can loop back to loopMBB or fall through to exitMBB. BB->addSuccessor(loopMBB); @@ -7116,34 +7586,19 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // Add epilogue to handle BytesLeft. BB = exitMBB; MachineInstr *StartOfExit = exitMBB->begin(); - ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; - strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; // [scratch, srcOut] = LDRB_POST(srcLoop, 1) // [destOut] = STRB_POST(scratch, destLoop, 1) unsigned srcIn = srcLoop; unsigned destIn = destLoop; for (unsigned i = 0; i < BytesLeft; i++) { - unsigned scratch = MRI.createVirtualRegister(TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); - - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addImm(1)); - } else { - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1)); - - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } + unsigned scratch = MRI.createVirtualRegister(TRC); + emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut, + IsThumb1, IsThumb2); + emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut, + IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } @@ -7293,46 +7748,49 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); + case ARM::ATOMIC_LOAD_I64: + return EmitAtomicLoad64(MI, BB); - case ARM::ATOMADD6432: + case ARM::ATOMIC_LOAD_ADD_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, /*NeedsCarry*/ true); - case ARM::ATOMSUB6432: + case ARM::ATOMIC_LOAD_SUB_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true); - case ARM::ATOMOR6432: + case ARM::ATOMIC_LOAD_OR_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMXOR6432: + case ARM::ATOMIC_LOAD_XOR_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMAND6432: + case ARM::ATOMIC_LOAD_AND_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMSWAP6432: + case ARM::ATOMIC_STORE_I64: + case ARM::ATOMIC_SWAP_I64: return EmitAtomicBinary64(MI, BB, 0, 0, false); - case ARM::ATOMCMPXCHG6432: + case ARM::ATOMIC_CMP_SWAP_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ false, /*IsCmpxchg*/true); - case ARM::ATOMMIN6432: + case ARM::ATOMIC_LOAD_MIN_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::LT); - case ARM::ATOMMAX6432: + case ARM::ATOMIC_LOAD_MAX_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::GE); - case ARM::ATOMUMIN6432: + case ARM::ATOMIC_LOAD_UMIN_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::LO); - case ARM::ATOMUMAX6432: + case ARM::ATOMIC_LOAD_UMAX_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, @@ -7710,13 +8168,13 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, // Slct is now know to be the desired identity constant when CC is true. SDValue TrueVal = OtherOp; - SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); // Unless SwapSelectOps says CC should be false. if (SwapSelectOps) std::swap(TrueVal, FalseVal); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, CCOp, TrueVal, FalseVal); } @@ -7823,9 +8281,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, llvm_unreachable("Invalid vector element type for padd optimization."); } - SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, &Ops[0], Ops.size()); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp); } static SDValue findMUL_LOHI(SDValue V) { @@ -7868,8 +8326,11 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, assert(AddcNode->getNumValues() == 2 && AddcNode->getValueType(0) == MVT::i32 && - AddcNode->getValueType(1) == MVT::Glue && - "Expect ADDC with two result values: i32, glue"); + "Expect ADDC with two result values. First: i32"); + + // Check that we have a glued ADDC node. + if (AddcNode->getValueType(1) != MVT::Glue) + return SDValue(); // Check that the ADDC adds the low result of the S/UMUL_LOHI. if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && @@ -7950,7 +8411,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, Ops.push_back(*LowAdd); Ops.push_back(*HiAdd); - SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(), + SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), DAG.getVTList(MVT::i32, MVT::i32), &Ops[0], Ops.size()); @@ -8038,6 +8499,13 @@ static SDValue PerformSUBCombine(SDNode *N, /// is faster than /// vadd d3, d0, d1 /// vmul d3, d3, d2 +// However, for (A + B) * (A + B), +// vadd d2, d0, d1 +// vmul d3, d0, d2 +// vmla d3, d1, d2 +// is slower than +// vadd d2, d0, d1 +// vmul d3, d2, d2 static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -8057,8 +8525,11 @@ static SDValue PerformVMULCombine(SDNode *N, std::swap(N0, N1); } + if (N0 == N1) + return SDValue(); + EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); return DAG.getNode(Opcode, DL, VT, @@ -8088,11 +8559,11 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); int64_t MulAmt = C->getSExtValue(); - unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); + unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt); ShiftAmt = ShiftAmt & (32 - 1); SDValue V = N->getOperand(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue Res; MulAmt >>= ShiftAmt; @@ -8156,7 +8627,7 @@ static SDValue PerformANDCombine(SDNode *N, // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; @@ -8199,7 +8670,7 @@ static SDValue PerformORCombine(SDNode *N, const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VORR BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; @@ -8248,22 +8719,29 @@ static SDValue PerformORCombine(SDNode *N, unsigned SplatBitSize; bool HasAnyUndefs; + APInt SplatBits0, SplatBits1; BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); - APInt SplatBits0; + BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); + // Ensure that the second operand of both ands are constants if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); - APInt SplatBits1; - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs && - SplatBits0 == ~SplatBits1) { - // Canonicalize the vector type to make instruction selection simpler. - EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; - SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, - N0->getOperand(1), N0->getOperand(0), - N1->getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); - } + HasAnyUndefs) && !HasAnyUndefs) { + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + // Ensure that the bit width of the constants are the same and that + // the splat arguments are logical inverses as per the pattern we + // are trying to simplify. + if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && + SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection + // simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + N0->getOperand(1), + N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } } } @@ -8274,7 +8752,7 @@ static SDValue PerformORCombine(SDNode *N, if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val // @@ -8309,7 +8787,7 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); if (ARM::isBitFieldInvertedMask(Mask)) { - Val >>= CountTrailingZeros_32(~Mask); + Val >>= countTrailingZeros(~Mask); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, DAG.getConstant(Val, MVT::i32), @@ -8336,7 +8814,7 @@ static SDValue PerformORCombine(SDNode *N, (Mask == 0xffff || Mask == 0xffff0000)) return SDValue(); // 2a - unsigned amt = CountTrailingZeros_32(Mask2); + unsigned amt = countTrailingZeros(Mask2); Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), DAG.getConstant(amt, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, @@ -8352,7 +8830,7 @@ static SDValue PerformORCombine(SDNode *N, (Mask2 == 0xffff || Mask2 == 0xffff0000)) return SDValue(); // 2b - unsigned lsb = CountTrailingZeros_32(Mask); + unsigned lsb = countTrailingZeros(Mask); Res = DAG.getNode(ISD::SRL, DL, VT, N00, DAG.getConstant(lsb, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, @@ -8370,7 +8848,7 @@ static SDValue PerformORCombine(SDNode *N, // where lsb(mask) == #shamt and masked bits of B are known zero. SDValue ShAmt = N00.getOperand(1); unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); - unsigned LSB = CountTrailingZeros_32(Mask); + unsigned LSB = countTrailingZeros(Mask); if (ShAmtC != LSB) return SDValue(); @@ -8413,12 +8891,12 @@ static SDValue PerformBFICombine(SDNode *N, if (!N11C) return SDValue(); unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); - unsigned LSB = CountTrailingZeros_32(~InvMask); - unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB; + unsigned LSB = countTrailingZeros(~InvMask); + unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; unsigned Mask = (1 << Width)-1; unsigned Mask2 = N11C->getZExtValue(); if ((Mask & (~Mask2)) == 0) - return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), + return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), N->getOperand(0), N1.getOperand(0), N->getOperand(2)); } @@ -8444,7 +8922,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, LoadSDNode *LD = cast<LoadSDNode>(InNode); SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = LD->getDebugLoc(); + SDLoc DL(LD); SDValue BasePtr = LD->getBasePtr(); SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->isVolatile(), @@ -8481,7 +8959,7 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { if (Op0.getOpcode() == ARMISD::VMOVRRD && Op0.getNode() == Op1.getNode() && Op0.getResNo() == 0 && Op1.getResNo() == 1) - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0.getOperand(0)); return SDValue(); } @@ -8523,7 +9001,7 @@ static SDValue PerformSTORECombine(SDNode *N, NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - DebugLoc DL = St->getDebugLoc(); + SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; @@ -8584,7 +9062,7 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = St->getDebugLoc(); + SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, StVal.getNode()->getOperand(0), BasePtr, @@ -8606,14 +9084,14 @@ static SDValue PerformSTORECombine(SDNode *N, // Bitcast an i64 store extracted from a vector to f64. // Otherwise, the i64 value will be legalized to a pair of i32 values. SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = StVal.getDebugLoc(); + SDLoc dl(StVal); SDValue IntVec = StVal.getOperand(0); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, IntVec.getValueType().getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Vec, StVal.getOperand(1)); - dl = N->getDebugLoc(); + dl = SDLoc(N); SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); @@ -8659,7 +9137,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, EVT VT = N->getValueType(0); if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) return SDValue(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SmallVector<SDValue, 8> Ops; unsigned NumElts = VT.getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { @@ -8673,6 +9151,98 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, return DAG.getNode(ISD::BITCAST, dl, VT, BV); } +/// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR. +static SDValue +PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR. + // At that time, we may have inserted bitcasts from integer to float. + // If these bitcasts have survived DAGCombine, change the lowering of this + // BUILD_VECTOR in something more vector friendly, i.e., that does not + // force to use floating point types. + + // Make sure we can change the type of the vector. + // This is possible iff: + // 1. The vector is only used in a bitcast to a integer type. I.e., + // 1.1. Vector is used only once. + // 1.2. Use is a bit convert to an integer type. + // 2. The size of its operands are 32-bits (64-bits are not legal). + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + + // Check 1.1. and 2. + if (EltVT.getSizeInBits() != 32 || !N->hasOneUse()) + return SDValue(); + + // By construction, the input type must be float. + assert(EltVT == MVT::f32 && "Unexpected type!"); + + // Check 1.2. + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() != ISD::BITCAST || + Use->getValueType(0).isFloatingPoint()) + return SDValue(); + + // Check profitability. + // Model is, if more than half of the relevant operands are bitcast from + // i32, turn the build_vector into a sequence of insert_vector_elt. + // Relevant operands are everything that is not statically + // (i.e., at compile time) bitcasted. + unsigned NumOfBitCastedElts = 0; + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumOfRelevantElts = NumElts; + for (unsigned Idx = 0; Idx < NumElts; ++Idx) { + SDValue Elt = N->getOperand(Idx); + if (Elt->getOpcode() == ISD::BITCAST) { + // Assume only bit cast to i32 will go away. + if (Elt->getOperand(0).getValueType() == MVT::i32) + ++NumOfBitCastedElts; + } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt)) + // Constants are statically casted, thus do not count them as + // relevant operands. + --NumOfRelevantElts; + } + + // Check if more than half of the elements require a non-free bitcast. + if (NumOfBitCastedElts <= NumOfRelevantElts / 2) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + // Create the new vector type. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + // Check if the type is legal. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VecVT)) + return SDValue(); + + // Combine: + // ARMISD::BUILD_VECTOR E1, E2, ..., EN. + // => BITCAST INSERT_VECTOR_ELT + // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1), + // (BITCAST EN), N. + SDValue Vec = DAG.getUNDEF(VecVT); + SDLoc dl(N); + for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { + SDValue V = N->getOperand(Idx); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (V.getOpcode() == ISD::BITCAST && + V->getOperand(0).getValueType() == MVT::i32) + // Fold obvious case. + V = V.getOperand(0); + else { + V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(V.getNode()); + } + SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx); + } + Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(Vec.getNode()); + return Vec; +} + /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, @@ -8686,7 +9256,7 @@ static SDValue PerformInsertEltCombine(SDNode *N, return SDValue(); SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VT.getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); @@ -8732,7 +9302,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { !TLI.isTypeLegal(Concat1Op1.getValueType())) return SDValue(); - SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Op0.getOperand(0), Op1.getOperand(0)); // Translate the shuffle mask. SmallVector<int, 16> NewMask; @@ -8748,7 +9318,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { NewElt = HalfElts + MaskElt - NumElts; NewMask.push_back(NewElt); } - return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, + return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, DAG.getUNDEF(VT), NewMask.data()); } @@ -8865,7 +9435,7 @@ static SDValue CombineBaseUpdate(SDNode *N, Ops.push_back(N->getOperand(i)); } MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); - SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys, + SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops.data(), Ops.size(), MemInt->getMemoryVT(), MemInt->getMemOperand()); @@ -8939,7 +9509,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); - SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, + SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, 2, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); @@ -8994,7 +9564,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N, if (EltSize > VT.getVectorElementType().getSizeInBits()) return SDValue(); - return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); + return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } // isConstVecPow2 - Return true if each vector element is a power of 2, all @@ -9051,12 +9621,27 @@ static SDValue PerformVCVTCombine(SDNode *N, !isConstVecPow2(ConstVec, isSigned, C)) return SDValue(); + MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); + MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + // These instructions only exist converting from f32 to i32. We can handle + // smaller integers by generating an extra truncate, but larger ones would + // be lossy. + return SDValue(); + } + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), - N->getValueType(0), - DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, - DAG.getConstant(Log2_64(C), MVT::i32)); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), + NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, + DAG.getConstant(Log2_64(C), MVT::i32)); + + if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv); + + return FixConv; } /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) @@ -9087,12 +9672,28 @@ static SDValue PerformVDIVCombine(SDNode *N, !isConstVecPow2(ConstVec, isSigned, C)) return SDValue(); + MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); + MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + // These instructions only exist converting from i32 to f32. We can handle + // smaller integers by generating an extra extend, but larger ones would + // be lossy. + return SDValue(); + } + + SDValue ConvInput = Op.getOperand(0); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + ConvInput); + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : Intrinsic::arm_neon_vcvtfxu2fp; - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), Op.getValueType(), DAG.getConstant(IntrinsicOpcode, MVT::i32), - Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); + ConvInput, DAG.getConstant(Log2_64(C), MVT::i32)); } /// Getvshiftimm - Check if this is a valid build_vector for the immediate @@ -9273,7 +9874,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { VShiftOpc = ARMISD::VQRSHRNsu; break; } - return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); } @@ -9290,7 +9891,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); } - return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), DAG.getConstant(Cnt, MVT::i32)); } @@ -9321,7 +9922,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && DAG.MaskedValueIsZero(N0.getOperand(0), APInt::getHighBitsSet(32, 16))) - return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); } } @@ -9338,7 +9939,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, case ISD::SHL: if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) - return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), + return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0), DAG.getConstant(Cnt, MVT::i32)); break; @@ -9347,7 +9948,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? ARMISD::VSHRs : ARMISD::VSHRu); - return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), + return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0), DAG.getConstant(Cnt, MVT::i32)); } } @@ -9387,7 +9988,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, Opc = ARMISD::VGETLANEu; break; } - return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); + return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane); } } @@ -9476,7 +10077,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, if (!Opcode) return SDValue(); - return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); + return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); } /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. @@ -9488,7 +10089,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { return SDValue(); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); SDValue FalseVal = N->getOperand(0); @@ -9578,6 +10179,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: return CombineBaseUpdate(N, DCI); + case ARMISD::BUILD_VECTOR: + return PerformARMBUILD_VECTORCombine(N, DCI); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { @@ -9702,6 +10305,21 @@ bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return false; } +bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + + if (!isTypeLegal(EVT::getEVT(Ty1))) + return false; + + assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); + + // Assuming the caller doesn't have a zeroext or signext return parameter, + // truncation all the way down to i1 is valid. + return true; +} + + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; @@ -10101,9 +10719,19 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); + unsigned BitWidth = KnownOne.getBitWidth(); + KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { default: break; + case ARMISD::ADDC: + case ARMISD::ADDE: + case ARMISD::SUBC: + case ARMISD::SUBE: + // These nodes' second result is a boolean + if (Op.getResNo() == 0) + break; + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); @@ -10217,7 +10845,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; RCPair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { @@ -10232,6 +10860,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'r': return RCPair(0U, &ARM::GPRRegClass); case 'w': + if (VT == MVT::Other) + break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) @@ -10240,6 +10870,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return RCPair(0U, &ARM::QPRRegClass); break; case 'x': + if (VT == MVT::Other) + break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPR_8RegClass); if (VT.getSizeInBits() == 64) @@ -10426,6 +11058,54 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } +SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); + unsigned Opcode = Op->getOpcode(); + assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && + "Invalid opcode for Div/Rem lowering"); + bool isSigned = (Opcode == ISD::SDIVREM); + EVT VT = Op->getValueType(0); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + + RTLIB::Libcall LC; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + } + + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + EVT ArgVT = Op->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy()); + + Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); + + SDLoc dl(Op); + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, + 0, getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); + + return CallInfo.first; +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -10434,17 +11114,15 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { bool ARM::isBitFieldInvertedMask(unsigned v) { if (v == 0xffffffff) - return 0; + return false; + // there can be 1's on either or both "outsides", all the "inside" // bits must be 0's - unsigned int lsb = 0, msb = 31; - while (v & (1 << msb)) --msb; - while (v & (1 << lsb)) ++lsb; - for (unsigned int i = lsb; i <= msb; ++i) { - if (v & (1 << i)) - return 0; - } - return 1; + unsigned TO = CountTrailingOnes_32(v); + unsigned LO = CountLeadingOnes_32(v); + v = (v >> TO) << TO; + v = (v << LO) >> LO; + return v == 0; } /// isFPImmLegal - Returns true if the target can instruction select the @@ -10513,6 +11191,30 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldrex: { + PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.vol = true; + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::arm_strex: { + PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(1); + Info.offset = 0; + Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.vol = true; + Info.readMem = false; + Info.writeMem = true; + return true; + } case Intrinsic::arm_strexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 426010e295d7..90facddeb02b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -52,6 +52,7 @@ namespace llvm { BR_JT, // Jumptable branch. BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). RET_FLAG, // Return with a flag operand. + INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand. PIC_ADD, // Add with a PC operand and a PIC label. @@ -94,7 +95,6 @@ namespace llvm { DYN_ALLOC, // Dynamic allocation on the stack. - MEMBARRIER, // Memory barrier (DMB) MEMBARRIER_MCR, // Memory barrier (MCR) PRELOAD, // Preload @@ -186,6 +186,8 @@ namespace llvm { // Floating-point max and min: FMAX, FMIN, + VMAXNM, + VMINNM, // Bit-field insert BFI, @@ -222,21 +224,7 @@ namespace llvm { VST4_UPD, VST2LN_UPD, VST3LN_UPD, - VST4LN_UPD, - - // 64-bit atomic ops (value split into two registers) - ATOMADD64_DAG, - ATOMSUB64_DAG, - ATOMOR64_DAG, - ATOMXOR64_DAG, - ATOMAND64_DAG, - ATOMNAND64_DAG, - ATOMSWAP64_DAG, - ATOMCMPXCHG64_DAG, - ATOMMIN64_DAG, - ATOMUMIN64_DAG, - ATOMMAX64_DAG, - ATOMUMAX64_DAG + VST4LN_UPD }; } @@ -270,7 +258,7 @@ namespace llvm { } /// getSetCCResultType - Return the value type to use for ISD::SETCC. - virtual EVT getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, @@ -298,6 +286,9 @@ namespace llvm { using TargetLowering::isZExtFree; virtual bool isZExtFree(SDValue Val, EVT VT2) const; + virtual bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const; + + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; @@ -349,7 +340,7 @@ namespace llvm { std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + MVT VT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is @@ -372,6 +363,12 @@ namespace llvm { /// be used for loads / stores from the global. virtual unsigned getMaximalGlobalOffset() const; + /// Returns true if a cast between SrcAS and DestAS is a noop. + virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { + // Addrspacecasts are always noops. + return true; + } + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo, @@ -412,21 +409,21 @@ namespace llvm { void addQRTypeForNEON(MVT VT); typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector; - void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, + void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, - SmallVector<SDValue, 8> &MemOpChains, + SmallVectorImpl<SDValue> &MemOpChains, ISD::ArgFlagsTy Flags) const; SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, - DebugLoc dl) const; + SDLoc dl) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const; SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; @@ -457,13 +454,26 @@ namespace llvm { const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; + SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster + /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be + /// expanded to FMAs when this method returns true, otherwise fmuladd is + /// expanded to fmul + fadd. + /// + /// ARM supports both fused and unfused multiply-add operations; we already + /// lower a pair of fmul and fadd to the latter so it's not clear that there + /// would be a gain or that the gain would be worthwhile enough to risk + /// correctness bugs. + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, SDValue ThisVal) const; @@ -471,24 +481,26 @@ namespace llvm { LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, unsigned OffsetFromOrigArg, unsigned ArgOffset, + unsigned ArgSize, bool ForceMutable) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, unsigned ArgOffset, bool ForceMutable = false) const; void computeRegArea(CCState &CCInfo, MachineFunction &MF, unsigned InRegsParamRecordIdx, + unsigned ArgSize, unsigned &ArgRegsSize, unsigned &ArgRegsSaveSize) const; @@ -522,16 +534,16 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const; virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const; + SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, - SelectionDAG &DAG, DebugLoc dl) const; + SelectionDAG &DAG, SDLoc dl) const; SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; @@ -556,6 +568,8 @@ namespace llvm { unsigned Size, bool signExtend, ARMCC::CondCodes Cond) const; + MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI, + MachineBasicBlock *BB) const; void SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index 67a6820932fc..f93504fddb8e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -155,6 +155,16 @@ def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm), let DecoderMethod = "DecodePredicateOperand"; } +// Selectable predicate operand for CMOV instructions. We can't use a normal +// predicate because the default values interfere with instruction selection. In +// all other respects it is identical though: pseudo-instruction expansion +// relies on the MachineOperands being compatible. +def cmovpred : Operand<i32>, PredicateOp, + ComplexPattern<i32, 2, "SelectCMOVPred"> { + let MIOperandInfo = (ops i32imm, i32imm); + let PrintMethod = "printPredicateOperand"; +} + // Conditional code result for instructions whose 's' bit is set, e.g. subs. def CCOutOperand : AsmOperandClass { let Name = "CCOut"; } def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> { @@ -237,6 +247,8 @@ class t2InstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>; class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>; +class VFP2DPInstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2,HasDPVFP]>; class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>; class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1> @@ -490,8 +502,7 @@ class JTI<dag oops, dag iops, InstrItinClass itin, : XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin, asm, "", pattern>; -// Atomic load/store instructions -class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, +class AIldr_ex_or_acq<bits<2> opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin, opc, asm, "", pattern> { @@ -502,23 +513,52 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{20} = 1; let Inst{19-16} = addr; let Inst{15-12} = Rt; - let Inst{11-0} = 0b111110011111; + let Inst{11-10} = 0b11; + let Inst{9-8} = opcod2; + let Inst{7-0} = 0b10011111; } -class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, +class AIstr_ex_or_rel<bits<2> opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin, opc, asm, "", pattern> { - bits<4> Rd; bits<4> Rt; bits<4> addr; let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; let Inst{20} = 0; let Inst{19-16} = addr; - let Inst{15-12} = Rd; - let Inst{11-4} = 0b11111001; + let Inst{11-10} = 0b11; + let Inst{9-8} = opcod2; + let Inst{7-4} = 0b1001; let Inst{3-0} = Rt; } +// Atomic load/store instructions +class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AIldr_ex_or_acq<opcod, 0b11, oops, iops, itin, opc, asm, pattern>; + +class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AIstr_ex_or_rel<opcod, 0b11, oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + let Inst{15-12} = Rd; +} + +// Exclusive load/store instructions + +class AIldaex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AIldr_ex_or_acq<opcod, 0b10, oops, iops, itin, opc, asm, pattern>, + Requires<[IsARM, HasV8]>; + +class AIstlex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AIstr_ex_or_rel<opcod, 0b10, oops, iops, itin, opc, asm, pattern>, + Requires<[IsARM, HasV8]> { + bits<4> Rd; + let Inst{15-12} = Rd; +} + class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern> : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, $addr", pattern> { bits<4> Rt; @@ -535,6 +575,18 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern> let Unpredictable{11-8} = 0b1111; let DecoderMethod = "DecodeSwap"; } +// Acquire/Release load/store instructions +class AIldracq<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AIldr_ex_or_acq<opcod, 0b00, oops, iops, itin, opc, asm, pattern>, + Requires<[IsARM, HasV8]>; + +class AIstrrel<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AIstr_ex_or_rel<opcod, 0b00, oops, iops, itin, opc, asm, pattern>, + Requires<[IsARM, HasV8]> { + let Inst{15-12} = 0b1111; +} // addrmode1 instructions class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, @@ -1230,8 +1282,9 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin, : Thumb2XI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>; // Move to/from coprocessor instructions -class T2Cop<bits<4> opc, dag oops, dag iops, string asm, list<dag> pattern> - : T2XI <oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2]> { +class T2Cop<bits<4> opc, dag oops, dag iops, string opcstr, string asm, + list<dag> pattern> + : T2I <oops, iops, NoItinerary, opcstr, asm, pattern>, Requires<[IsThumb2]> { let Inst{31-28} = opc; } @@ -1389,7 +1442,6 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, let Inst{15-12} = Dd{3-0}; let Inst{7-0} = addr{7-0}; // imm8 - // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; let Inst{11-9} = 0b101; @@ -1415,7 +1467,6 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, let Inst{15-12} = Sd{4-1}; let Inst{7-0} = addr{7-0}; // imm8 - // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; let Inst{11-9} = 0b101; @@ -1437,6 +1488,28 @@ class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr, } // Load / store multiple + +// Unknown precision +class AXXI4<dag oops, dag iops, IndexMode im, + string asm, string cstr, list<dag> pattern> + : VFPXI<oops, iops, AddrMode4, 4, im, + VFPLdStFrm, NoItinerary, asm, cstr, pattern> { + // Instruction operands. + bits<4> Rn; + bits<13> regs; + + // Encode instruction operands. + let Inst{19-16} = Rn; + let Inst{22} = 0; + let Inst{15-12} = regs{11-8}; + let Inst{7-1} = regs{7-1}; + + let Inst{27-25} = 0b110; + let Inst{11-8} = 0b1011; + let Inst{0} = 1; +} + +// Double precision class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : VFPXI<oops, iops, AddrMode4, 4, im, @@ -1449,14 +1522,15 @@ class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, let Inst{19-16} = Rn; let Inst{22} = regs{12}; let Inst{15-12} = regs{11-8}; - let Inst{7-0} = regs{7-0}; + let Inst{7-1} = regs{7-1}; - // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; let Inst{11-9} = 0b101; let Inst{8} = 1; // Double precision + let Inst{0} = 0; } +// Single Precision class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : VFPXI<oops, iops, AddrMode4, 4, im, @@ -1471,7 +1545,6 @@ class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, let Inst{15-12} = regs{12-9}; let Inst{7-0} = regs{7-0}; - // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; let Inst{11-9} = 0b101; let Inst{8} = 0; // Single precision @@ -1499,6 +1572,34 @@ class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, let Inst{8} = 1; // Double precision let Inst{7-6} = opcod4; let Inst{4} = opcod5; + + let Predicates = [HasVFP2, HasDPVFP]; +} + +// Double precision, unary, not-predicated +class ADuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, VFPUnaryFrm, itin, asm, "", pattern> { + // Instruction operands. + bits<5> Dd; + bits<5> Dm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; } // Double precision, binary @@ -1525,9 +1626,42 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{8} = 1; // Double precision let Inst{6} = op6; let Inst{4} = op4; + + let Predicates = [HasVFP2, HasDPVFP]; +} + +// FP, binary, not predicated +class ADbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, VFPBinaryFrm, itin, + asm, "", pattern> +{ + // Instruction operands. + bits<5> Dd; + bits<5> Dn; + bits<5> Dm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{19-16} = Dn{3-0}; + let Inst{7} = Dn{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // double precision + let Inst{6} = opcod3; + let Inst{4} = 0; + + let Predicates = [HasVFP2, HasDPVFP]; } -// Single precision, unary +// Single precision, unary, predicated class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -1551,6 +1685,33 @@ class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, let Inst{4} = opcod5; } +// Single precision, unary, non-predicated +class ASuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, + VFPUnaryFrm, itin, asm, "", pattern> { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + // Single precision unary, if no NEON. Same as ASuI except not available if // NEON is enabled. class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, @@ -1586,6 +1747,35 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, let Inst{4} = op4; } +// Single precision, binary, not predicated +class ASbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, + VFPBinaryFrm, itin, asm, "", pattern> +{ + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{6} = opcod3; + let Inst{4} = 0; +} + // Single precision binary, if no NEON. Same as ASbI except not available if // NEON is enabled. class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, @@ -1698,6 +1888,21 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, let DecoderNamespace = "NEON"; } +// Same as NeonI except it is not predicated +class NeonInp<dag oops, dag iops, AddrMode am, IndexMode im, Format f, + InstrItinClass itin, string opc, string dt, string asm, string cstr, + list<dag> pattern> + : InstARM<am, 4, im, f, NeonDomain, cstr, itin> { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = !strconcat(opc, ".", dt, "\t", asm); + let Pattern = pattern; + list<Predicate> Predicates = [HasNEON]; + let DecoderNamespace = "NEON"; + + let Inst{31-28} = 0b1111; +} + class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> @@ -1817,6 +2022,35 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{5} = Vm{4}; } +// Same as N2V but not predicated. +class N2Vnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, + dag oops, dag iops, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, list<dag> pattern> + : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vm", "", pattern> { + bits<5> Vd; + bits<5> Vm; + + // Encode instruction operands + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; + + // Encode constant bits + let Inst{27-23} = 0b00111; + let Inst{21-20} = 0b11; + let Inst{19-18} = op19_18; + let Inst{17-16} = op17_16; + let Inst{11} = 0; + let Inst{10-8} = op10_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{4} = 0; + + let DecoderNamespace = "NEON"; +} + // Same as N2V except it doesn't have a datatype suffix. class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -1898,6 +2132,32 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{5} = Vm{4}; } +class N3Vnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, dag oops, dag iops,Format f, InstrItinClass itin, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable, list<dag> pattern> + : NeonInp<oops, iops, AddrModeNone, IndexModeNone, f, itin, OpcodeStr, + Dt, "$Vd, $Vn, $Vm", "", pattern> { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + // Encode instruction operands + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; + + // Encode constant bits + let Inst{27-23} = op27_23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; +} + class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index 80f0ec74376a..df867b49ab57 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" @@ -29,7 +30,7 @@ using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(*this, STI) { + : ARMBaseInstrInfo(STI), RI(STI) { } /// getNoopForMachoTarget - Return the noop instruction to use for a noop. @@ -106,29 +107,42 @@ namespace { if (TM->getRelocationModel() != Reloc::PIC_) return false; - LLVMContext* Context = &MF.getFunction()->getContext(); - GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, - GlobalValue::ExternalLinkage, 0, - "_GLOBAL_OFFSET_TABLE_"); - unsigned Id = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id); - unsigned Align = TM->getDataLayout()->getPrefTypeAlignment(GV->getType()); + LLVMContext *Context = &MF.getFunction()->getContext(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); + unsigned PCAdj = TM->getSubtarget<ARMSubtarget>().isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create( + *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj); + + unsigned Align = TM->getDataLayout() + ->getPrefTypeAlignment(Type::getInt32PtrTy(*Context)); unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align); MachineBasicBlock &FirstMBB = MF.front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); DebugLoc DL = FirstMBB.findDebugLoc(MBBI); - unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + unsigned TempReg = + MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? ARM::t2LDRpci : ARM::LDRcp; const TargetInstrInfo &TII = *TM->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL, - TII.get(Opc), GlobalBaseReg) + TII.get(Opc), TempReg) .addConstantPoolIndex(Idx); if (Opc == ARM::LDRcp) MIB.addImm(0); AddDefaultPred(MIB); + // Fix the GOT address by adding pc. + unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? ARM::tPICADD + : ARM::PICADD; + MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg) + .addReg(TempReg) + .addImm(ARMPCLabelIndex); + if (Opc == ARM::PICADD) + AddDefaultPred(MIB); + + return true; } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index 1bd174e34162..2042c0460932 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -71,6 +71,9 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>; +def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>; + def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, @@ -118,7 +121,8 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - +def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; @@ -162,8 +166,6 @@ def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain, SDNPSideEffect]>; -def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, - [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain, SDNPSideEffect]>; def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, @@ -174,9 +176,11 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; +def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>; +def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // @@ -189,11 +193,18 @@ def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">, def HasV6 : Predicate<"Subtarget->hasV6Ops()">, AssemblerPredicate<"HasV6Ops", "armv6">; def NoV6 : Predicate<"!Subtarget->hasV6Ops()">; +def HasV6M : Predicate<"Subtarget->hasV6MOps()">, + AssemblerPredicate<"HasV6MOps", + "armv6m or armv6t2">; def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate<"HasV6T2Ops", "armv6t2">; def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate<"HasV7Ops", "armv7">; +def HasV8 : Predicate<"Subtarget->hasV8Ops()">, + AssemblerPredicate<"HasV8Ops", "armv8">; +def PreV8 : Predicate<"!Subtarget->hasV8Ops()">, + AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2", "VFP2">; @@ -201,14 +212,23 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3", "VFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, AssemblerPredicate<"FeatureVFP4", "VFP4">; +def HasDPVFP : Predicate<"!Subtarget->isFPOnlySP()">, + AssemblerPredicate<"!FeatureVFPOnlySP", + "double precision VFP">; +def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, + AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "NEON">; +def HasCrypto : Predicate<"Subtarget->hasCrypto()">, + AssemblerPredicate<"FeatureCrypto", "crypto">; +def HasCRC : Predicate<"Subtarget->hasCRC()">, + AssemblerPredicate<"FeatureCRC", "crc">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, - AssemblerPredicate<"FeatureHWDiv", "divide">; + AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">; def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, - AssemblerPredicate<"FeatureHWDivARM">; + AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, AssemblerPredicate<"FeatureT2XtPk", "pack/extract">; @@ -233,10 +253,10 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate<"ModeThumb,FeatureThumb2", "thumb2">; def IsMClass : Predicate<"Subtarget->isMClass()">, - AssemblerPredicate<"FeatureMClass", "armv7m">; -def IsARClass : Predicate<"!Subtarget->isMClass()">, + AssemblerPredicate<"FeatureMClass", "armv*m">; +def IsNotMClass : Predicate<"!Subtarget->isMClass()">, AssemblerPredicate<"!FeatureMClass", - "armv7a/r">; + "!armv*m">; def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate<"!ModeThumb", "arm-mode">; def IsIOS : Predicate<"Subtarget->isTargetIOS()">; @@ -258,7 +278,9 @@ def UseMulOps : Predicate<"Subtarget->useMulOps()">; def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" " FPOpFusion::Fast) && " "!Subtarget->isTargetDarwin()">; -def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " +def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast &&" + " Subtarget->hasVFP4()) || " "Subtarget->isTargetDarwin()">; // VGETLNi32 is microcoded on Swift - prefer VMOV. @@ -275,8 +297,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; -def IsLE : Predicate<"TLI.isLittleEndian()">; -def IsBE : Predicate<"TLI.isBigEndian()">; +def IsLE : Predicate<"getTargetLowering()->isLittleEndian()">; +def IsBE : Predicate<"getTargetLowering()->isBigEndian()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -456,7 +478,7 @@ def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; } def adrlabel : Operand<i32> { let EncoderMethod = "getAdrLabelOpValue"; let ParserMatchClass = AdrLabelAsmOperand; - let PrintMethod = "printAdrLabelOperand"; + let PrintMethod = "printAdrLabelOperand<0>"; } def neon_vcvt_imm32 : Operand<i32> { @@ -581,17 +603,6 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; } def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; } def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; } -/// imm0_4 predicate - Immediate in the range [0,4]. -def Imm0_4AsmOperand : ImmAsmOperand -{ - let Name = "Imm0_4"; - let DiagnosticType = "ImmRange0_4"; -} -def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> { - let ParserMatchClass = Imm0_4AsmOperand; - let DecoderMethod = "DecodeImm0_4"; -} - /// imm0_7 predicate - Immediate in the range [0,7]. def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ @@ -671,6 +682,15 @@ def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_63AsmOperand; } +/// imm0_239 predicate - Immediate in the range [0,239]. +def Imm0_239AsmOperand : ImmAsmOperand { + let Name = "Imm0_239"; + let DiagnosticType = "ImmRange0_239"; +} +def imm0_239 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 240; }]> { + let ParserMatchClass = Imm0_239AsmOperand; +} + /// imm0_255 predicate - Immediate in the range [0,255]. def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; } def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { @@ -702,6 +722,11 @@ def imm0_65535_expr : Operand<i32> { let ParserMatchClass = Imm0_65535ExprAsmOperand; } +def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; } +def imm256_65535_expr : Operand<i32> { + let ParserMatchClass = Imm256_65535ExprAsmOperand; +} + /// imm24b - True if the 32-bit immediate is encodable in 24 bits. def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; } def imm24b : Operand<i32>, ImmLeaf<i32, [{ @@ -1007,11 +1032,6 @@ def p_imm : Operand<i32> { let DecoderMethod = "DecodeCoprocessor"; } -def pf_imm : Operand<i32> { - let PrintMethod = "printPImmediate"; - let ParserMatchClass = CoprocNumAsmOperand; -} - def CoprocRegAsmOperand : AsmOperandClass { let Name = "CoprocReg"; let ParserMethod = "parseCoprocRegOperand"; @@ -1327,7 +1347,7 @@ class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot), IIC_iEXTr, opc, "\t$Rd, $Rm$rot", [(set GPRnopc:$Rd, (opnode (rotr GPRnopc:$Rm, rot_imm:$rot)))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> { bits<4> Rd; bits<4> Rm; bits<2> rot; @@ -1340,11 +1360,11 @@ class AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> class AI_ext_rrot_np<bits<8> opcod, string opc> : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPRnopc:$Rm, rot_imm:$rot), IIC_iEXTr, opc, "\t$Rd, $Rm$rot", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> { bits<2> rot; let Inst{19-16} = 0b1111; let Inst{11-10} = rot; -} + } /// AI_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. @@ -1353,7 +1373,7 @@ class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", [(set GPRnopc:$Rd, (opnode GPR:$Rn, (rotr GPRnopc:$Rm, rot_imm:$rot)))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> { bits<4> Rd; bits<4> Rm; bits<4> Rn; @@ -1368,7 +1388,7 @@ class AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> class AI_exta_rrot_np<bits<8> opcod, string opc> : AExtI<opcod, (outs GPRnopc:$Rd), (ins GPR:$Rn, GPRnopc:$Rm, rot_imm:$rot), IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> { bits<4> Rn; bits<2> rot; let Inst{19-16} = Rn; @@ -1664,53 +1684,11 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, [(ARMcallseq_start timm:$amt)]>; } -// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops. -// (These pseudos use a hand-written selection code). -let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in { -def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, - GPR:$set1, GPR:$set2), - NoItinerary, []>; -def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -} - -def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary, +def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { - bits<3> imm; - let Inst{27-3} = 0b0011001000001111000000000; - let Inst{2-0} = imm; + bits<8> imm; + let Inst{27-8} = 0b00110010000011110000; + let Inst{7-0} = imm; } def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>; @@ -1718,6 +1696,9 @@ def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; + +def : Pat<(int_arm_sevl), (HINT 5)>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { @@ -1735,12 +1716,23 @@ def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", // The 16-bit operand $val can be used by a debugger to store more information // about the breakpoint. -def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, - "bkpt", "\t$val", []>, Requires<[IsARM]> { +def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, + "bkpt", "\t$val", []>, Requires<[IsARM]> { bits<16> val; let Inst{3-0} = val{3-0}; let Inst{19-8} = val{15-4}; let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0xe; // AL + let Inst{7-4} = 0b0111; +} + +def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, + "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> { + bits<16> val; + let Inst{3-0} = val{3-0}; + let Inst{19-8} = val{15-4}; + let Inst{27-20} = 0b00010000; + let Inst{31-28} = 0xe; // AL let Inst{7-4} = 0b0111; } @@ -1780,7 +1772,8 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> { def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload, !strconcat(opc, "\t$addr"), - [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> { + [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]>, + Sched<[WritePreLd]> { bits<4> Rt; bits<17> addr; let Inst{31-26} = 0b111101; @@ -1796,7 +1789,8 @@ multiclass APreLoad<bits<1> read, bits<1> data, string opc> { def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload, !strconcat(opc, "\t$shift"), - [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> { + [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]>, + Sched<[WritePreLd]> { bits<17> shift; let Inst{31-26} = 0b111101; let Inst{25} = 1; // 1 for register form @@ -1816,7 +1810,7 @@ defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>; defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>; def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary, - "setend\t$end", []>, Requires<[IsARM]> { + "setend\t$end", []>, Requires<[IsARM]>, Deprecated<HasV8Ops> { bits<1> end; let Inst{31-10} = 0b1111000100000001000000; let Inst{9} = end; @@ -1863,7 +1857,8 @@ def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, let isNotDuplicable = 1 in { def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), 4, IIC_iALUr, - [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; + [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>, + Sched<[WriteALU, ReadALU]>; let AddedComplexity = 10 in { def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), @@ -1923,11 +1918,11 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), let hasSideEffects = 1 in { def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), - 4, IIC_iALUi, []>; + 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), - 4, IIC_iALUi, []>; + 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; } //===----------------------------------------------------------------------===// @@ -1938,16 +1933,22 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { // ARMV4T and above def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, "bx", "\tlr", [(ARMretflag)]>, - Requires<[IsARM, HasV4T]> { + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { let Inst{27-0} = 0b0001001011111111111100011110; } // ARMV4 only def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br, "mov", "\tpc, lr", [(ARMretflag)]>, - Requires<[IsARM, NoV4T]> { + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> { let Inst{27-0} = 0b0001101000001111000000001110; } + + // Exception return: N.b. doesn't set CPSR as far as we're concerned (it sets + // the user-space one). + def SUBS_PC_LR : ARMPseudoInst<(outs), (ins i32imm:$offset, pred:$p), + 4, IIC_Br, + [(ARMintretflag imm:$offset)]>; } // Indirect branches @@ -1955,7 +1956,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // ARMV4T and above def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst", [(brind GPR:$dst)]>, - Requires<[IsARM, HasV4T]> { + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { bits<4> dst; let Inst{31-4} = 0b1110000100101111111111110001; let Inst{3-0} = dst; @@ -1963,7 +1964,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx", "\t$dst", [/* pattern left blank */]>, - Requires<[IsARM, HasV4T]> { + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { bits<4> dst; let Inst{27-4} = 0b000100101111111111110001; let Inst{3-0} = dst; @@ -1980,7 +1981,7 @@ let isCall = 1, def BL : ABXI<0b1011, (outs), (ins bl_target:$func), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsARM]> { + Requires<[IsARM]>, Sched<[WriteBrL]> { let Inst{31-28} = 0b1110; bits<24> func; let Inst{23-0} = func; @@ -1990,7 +1991,7 @@ let isCall = 1, def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func), IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM]> { + Requires<[IsARM]>, Sched<[WriteBrL]> { bits<24> func; let Inst{23-0} = func; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -2000,7 +2001,7 @@ let isCall = 1, def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, - Requires<[IsARM, HasV5T]> { + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; let Inst{3-0} = func; @@ -2009,7 +2010,7 @@ let isCall = 1, def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx", "\t$func", [(ARMcall_pred GPR:$func)]>, - Requires<[IsARM, HasV5T]> { + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { bits<4> func; let Inst{27-4} = 0b000100101111111111110011; let Inst{3-0} = func; @@ -2019,18 +2020,18 @@ let isCall = 1, // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T]>; + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]>; // ARMv4 def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T]>; + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsARM]>; + Requires<[IsARM]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1 in { @@ -2038,7 +2039,8 @@ let isBranch = 1, isTerminator = 1 in { // a two-value operand where a dag node expects two operands. :( def Bcc : ABI<0b1010, (outs), (ins br_target:$target), IIC_Br, "b", "\t$target", - [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> { + [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>, + Sched<[WriteBr]> { bits<24> target; let Inst{23-0} = target; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -2051,25 +2053,27 @@ let isBranch = 1, isTerminator = 1 in { // should be sufficient. // FIXME: Is B really a Barrier? That doesn't seem right. def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br, - [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>; + [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>, + Sched<[WriteBr]>; let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : ARMPseudoInst<(outs), (ins GPR:$target, i32imm:$jt, i32imm:$id), 0, IIC_Br, - [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>; + [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>, + Sched<[WriteBr]>; // FIXME: This shouldn't use the generic "addrmode2," but rather be split // into i12 and rs suffixed versions. def BR_JTm : ARMPseudoInst<(outs), (ins addrmode2:$target, i32imm:$jt, i32imm:$id), 0, IIC_Br, [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, - imm:$id)]>; + imm:$id)]>, Sched<[WriteBrTbl]>; def BR_JTadd : ARMPseudoInst<(outs), (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id), 0, IIC_Br, [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, - imm:$id)]>; + imm:$id)]>, Sched<[WriteBrTbl]>; } // isNotDuplicable = 1, isIndirectBranch = 1 } // isBarrier = 1 @@ -2078,7 +2082,7 @@ let isBranch = 1, isTerminator = 1 in { // BLX (immediate) def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary, "blx\t$target", []>, - Requires<[IsARM, HasV5T]> { + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { let Inst{31-25} = 0b1111101; bits<25> target; let Inst{23-0} = target{24-1}; @@ -2087,7 +2091,7 @@ def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary, // Branch and Exchange Jazelle def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", - [/* pattern left blank */]> { + [/* pattern left blank */]>, Sched<[WriteBr]> { bits<4> func; let Inst{23-20} = 0b0010; let Inst{19-8} = 0xfff; @@ -2098,18 +2102,20 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", // Tail calls. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { - def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>; + def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>, + Sched<[WriteBr]>; - def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>; + def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>, + Sched<[WriteBr]>; def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst), 4, IIC_Br, [], (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM]>; + Requires<[IsARM]>, Sched<[WriteBr]>; def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst), 4, IIC_Br, [], - (BX GPR:$dst)>, + (BX GPR:$dst)>, Sched<[WriteBr]>, Requires<[IsARM]>; } @@ -2123,7 +2129,8 @@ def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", // Supervisor Call (Software Interrupt) let isCall = 1, Uses = [SP] in { -def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []> { +def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []>, + Sched<[WriteBr]> { bits<24> svc; let Inst{23-0} = svc; } @@ -2272,6 +2279,13 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), []>, Requires<[IsARM, HasV5TE]>; } +def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "lda", "\t$Rt, $addr", []>; +def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldab", "\t$Rt, $addr", []>; +def LDAH : AIldracq<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldah", "\t$Rt, $addr", []>; + // Indexed loads multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass iii, InstrItinClass iir> { @@ -2284,7 +2298,6 @@ multiclass AI2_ldridx<bit isByte, string opc, let Inst{19-16} = addr{16-13}; let Inst{11-0} = addr{11-0}; let DecoderMethod = "DecodeLDRPreImm"; - let AsmMatchConverter = "cvtLdWriteBackRegAddrModeImm12"; } def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), @@ -2297,7 +2310,6 @@ multiclass AI2_ldridx<bit isByte, string opc, let Inst{11-0} = addr{11-0}; let Inst{4} = 0; let DecoderMethod = "DecodeLDRPreReg"; - let AsmMatchConverter = "cvtLdWriteBackRegAddrMode2"; } def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), @@ -2355,7 +2367,6 @@ multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> { let Inst{19-16} = addr{12-9}; // Rn let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let AsmMatchConverter = "cvtLdWriteBackRegAddrMode3"; let DecoderMethod = "DecodeAddrMode3Instruction"; } def _POST : AI3ldstidx<op, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), @@ -2391,7 +2402,6 @@ def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm let DecoderMethod = "DecodeAddrMode3Instruction"; - let AsmMatchConverter = "cvtLdrdPre"; } def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), (ins addr_offset_none:$addr, am3offset:$offset), @@ -2494,7 +2504,6 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{22} = 1; let Inst{11-8} = offset{7-4}; let Inst{3-0} = offset{3-0}; - let AsmMatchConverter = "cvtLdExtTWriteBackImm"; } def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb), (ins addr_offset_none:$addr, postidx_reg:$Rm), @@ -2506,7 +2515,6 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{11-8} = 0; let Unpredictable{11-8} = 0b1111; let Inst{3-0} = Rm{3-0}; - let AsmMatchConverter = "cvtLdExtTWriteBackReg"; let DecoderMethod = "DecodeLDR"; } } @@ -2544,7 +2552,6 @@ multiclass AI2_stridx<bit isByte, string opc, let Inst{23} = addr{12}; // U (add = ('U' == 1)) let Inst{19-16} = addr{16-13}; // Rn let Inst{11-0} = addr{11-0}; // imm12 - let AsmMatchConverter = "cvtStWriteBackRegAddrModeImm12"; let DecoderMethod = "DecodeSTRPreImm"; } @@ -2558,7 +2565,6 @@ multiclass AI2_stridx<bit isByte, string opc, let Inst{19-16} = addr{16-13}; // Rn let Inst{11-0} = addr{11-0}; let Inst{4} = 0; // Inst{4} = 0 - let AsmMatchConverter = "cvtStWriteBackRegAddrMode2"; let DecoderMethod = "DecodeSTRPreReg"; } def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), @@ -2667,7 +2673,6 @@ def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), let Inst{19-16} = addr{12-9}; // Rn let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let AsmMatchConverter = "cvtStWriteBackRegAddrMode3"; let DecoderMethod = "DecodeAddrMode3Instruction"; } @@ -2701,7 +2706,6 @@ def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb), let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm let DecoderMethod = "DecodeAddrMode3Instruction"; - let AsmMatchConverter = "cvtStrdPre"; } def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb), @@ -2808,7 +2812,6 @@ multiclass AI3strT<bits<4> op, string opc> { let Inst{22} = 1; let Inst{11-8} = offset{7-4}; let Inst{3-0} = offset{3-0}; - let AsmMatchConverter = "cvtStExtTWriteBackImm"; } def r : AI3ldstidxT<op, 0, (outs GPR:$base_wb), (ins GPR:$Rt, addr_offset_none:$addr, postidx_reg:$Rm), @@ -2819,13 +2822,18 @@ multiclass AI3strT<bits<4> op, string opc> { let Inst{22} = 0; let Inst{11-8} = 0; let Inst{3-0} = Rm{3-0}; - let AsmMatchConverter = "cvtStExtTWriteBackReg"; } } defm STRHT : AI3strT<0b1011, "strht">; +def STL : AIstrrel<0b00, (outs), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stl", "\t$Rt, $addr", []>; +def STLB : AIstrrel<0b10, (outs), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlb", "\t$Rt, $addr", []>; +def STLH : AIstrrel<0b11, (outs), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlh", "\t$Rt, $addr", []>; //===----------------------------------------------------------------------===// // Load / store multiple Instructions. @@ -2955,7 +2963,7 @@ defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m, let neverHasSideEffects = 1 in def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, - "mov", "\t$Rd, $Rm", []>, UnaryDP { + "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<4> Rm; @@ -2969,7 +2977,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, // A version for the smaller set of tail call registers. let neverHasSideEffects = 1 in def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, - IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP { + IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<4> Rm; @@ -2982,7 +2990,8 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src), DPSoRegRegFrm, IIC_iMOVsr, "mov", "\t$Rd, $src", - [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP { + [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; @@ -2998,7 +3007,7 @@ def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src), def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), DPSoRegImmFrm, IIC_iMOVsr, "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>, - UnaryDP { + UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; @@ -3011,7 +3020,8 @@ def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi, - "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP { + "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> imm; let Inst{25} = 1; @@ -3025,7 +3035,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm), DPFrm, IIC_iMOVi, "movw", "\t$Rd, $imm", [(set GPR:$Rd, imm0_65535:$imm)]>, - Requires<[IsARM, HasV6T2]>, UnaryDP { + Requires<[IsARM, HasV6T2]>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<16> imm; let Inst{15-12} = Rd; @@ -3041,7 +3051,8 @@ def : InstAlias<"mov${p} $Rd, $imm", Requires<[IsARM]>; def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; let Constraints = "$src = $Rd" in { def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), @@ -3051,7 +3062,7 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), [(set GPRnopc:$Rd, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>, UnaryDP, - Requires<[IsARM, HasV6T2]> { + Requires<[IsARM, HasV6T2]>, Sched<[WriteALU]> { bits<4> Rd; bits<16> imm; let Inst{15-12} = Rd; @@ -3063,7 +3074,8 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), } def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), - (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; } // Constraints @@ -3073,7 +3085,7 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>, let Uses = [CPSR] in def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP, - Requires<[IsARM]>; + Requires<[IsARM]>, Sched<[WriteALU]>; // These aren't really mov instructions, but we have to define them this way // due to flag operands. @@ -3081,10 +3093,10 @@ def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, let Defs = [CPSR] in { def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP, - Requires<[IsARM]>; + Sched<[WriteALU]>, Requires<[IsARM]>; def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP, - Requires<[IsARM]>; + Sched<[WriteALU]>, Requires<[IsARM]>; } //===----------------------------------------------------------------------===// @@ -3250,7 +3262,8 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, list<dag> pattern = [], dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm), string asm = "\t$Rd, $Rn, $Rm"> - : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> { + : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern>, + Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rn; bits<4> Rd; bits<4> Rm; @@ -3265,9 +3278,11 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, // Saturating add/subtract +let DecoderMethod = "DecodeQADDInstruction" in def QADD : AAI<0b00010000, 0b00000101, "qadd", [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))], (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; + def QSUB : AAI<0b00010010, 0b00000101, "qsub", [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))], (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; @@ -3326,7 +3341,7 @@ def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">; def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), MulFrm /* for convenience */, NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -3340,7 +3355,7 @@ def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), MulFrm /* for convenience */, NoItinerary, "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{ bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -3473,7 +3488,7 @@ def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm), def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, "mvn", "\t$Rd, $Rm", - [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP { + [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<4> Rm; let Inst{25} = 0; @@ -3484,7 +3499,8 @@ def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, } def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift), DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", - [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP { + [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> shift; let Inst{25} = 0; @@ -3496,7 +3512,8 @@ def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift), } def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift), DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", - [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP { + [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> shift; let Inst{25} = 0; @@ -3511,7 +3528,7 @@ def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift), let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMVNi, "mvn", "\t$Rd, $imm", - [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP { + [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<12> imm; let Inst{25} = 1; @@ -3993,14 +4010,58 @@ def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd), // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. +// We also can not replace a srl (17..31) by an arithmetic shift we would use in +// pkhtb src1, src2, asr (17..31). def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), - (srl GPRnopc:$src2, imm16_31:$sh)), + (srl GPRnopc:$src2, imm16:$sh)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16:$sh)>; +def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), + (sra GPRnopc:$src2, imm16_31:$sh)), (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>; def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)), (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>; //===----------------------------------------------------------------------===// +// CRC Instructions +// +// Polynomials: +// + CRC32{B,H,W} 0x04C11DB7 +// + CRC32C{B,H,W} 0x1EDC6F41 +// + +class AI_crc32<bit C, bits<2> sz, string suffix, SDPatternOperator builtin> + : AInoP<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), MiscFrm, NoItinerary, + !strconcat("crc32", suffix), "\t$Rd, $Rn, $Rm", + [(set GPRnopc:$Rd, (builtin GPRnopc:$Rn, GPRnopc:$Rm))]>, + Requires<[IsARM, HasV8, HasCRC]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + + let Inst{31-28} = 0b1110; + let Inst{27-23} = 0b00010; + let Inst{22-21} = sz; + let Inst{20} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-10} = 0b00; + let Inst{9} = C; + let Inst{8} = 0; + let Inst{7-4} = 0b0100; + let Inst{3-0} = Rm; + + let Unpredictable{11-8} = 0b1101; +} + +def CRC32B : AI_crc32<0, 0b00, "b", int_arm_crc32b>; +def CRC32CB : AI_crc32<1, 0b00, "cb", int_arm_crc32cb>; +def CRC32H : AI_crc32<0, 0b01, "h", int_arm_crc32h>; +def CRC32CH : AI_crc32<1, 0b01, "ch", int_arm_crc32ch>; +def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>; +def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>; + +//===----------------------------------------------------------------------===// // Comparison Instructions... // @@ -4022,7 +4083,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs), let isCompare = 1, Defs = [CPSR] in { def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi, "cmn", "\t$Rn, $imm", - [(ARMcmn GPR:$Rn, so_imm:$imm)]> { + [(ARMcmn GPR:$Rn, so_imm:$imm)]>, + Sched<[WriteCMP, ReadALU]> { bits<4> Rn; bits<12> imm; let Inst{25} = 1; @@ -4038,7 +4100,7 @@ def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi, def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr, "cmn", "\t$Rn, $Rm", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPR:$Rn, GPR:$Rm)]> { + GPR:$Rn, GPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> { bits<4> Rn; bits<4> Rm; let isCommutable = 1; @@ -4056,7 +4118,8 @@ def CMNzrsi : AI1<0b1011, (outs), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr, "cmn", "\t$Rn, $shift", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPR:$Rn, so_reg_imm:$shift)]> { + GPR:$Rn, so_reg_imm:$shift)]>, + Sched<[WriteCMPsi, ReadALU]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; @@ -4074,7 +4137,8 @@ def CMNzrsr : AI1<0b1011, (outs), (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr, "cmn", "\t$Rn, $shift", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPRnopc:$Rn, so_reg_reg:$shift)]> { + GPRnopc:$Rn, so_reg_reg:$shift)]>, + Sched<[WriteCMPsr, ReadALU]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; @@ -4112,65 +4176,77 @@ let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, def BCCi64 : PseudoInst<(outs), (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), IIC_Br, - [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>; + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>, + Sched<[WriteBr]>; def BCCZi64 : PseudoInst<(outs), (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br, - [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>; + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>, + Sched<[WriteBr]>; } // usesCustomInserter // Conditional moves -// FIXME: should be able to write a pattern for ARMcmov, but can't use -// a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { let isCommutable = 1, isSelect = 1 in -def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), +def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, GPR:$Rm, cmovpred:$p), 4, IIC_iCMOVr, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg_imm:$shift, pred:$p), - 4, IIC_iCMOVsr, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift, - imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p), + 4, IIC_iCMOVsr, + [(set GPR:$Rd, + (ARMcmov GPR:$false, so_reg_imm:$shift, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg_reg:$shift, pred:$p), + (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p), 4, IIC_iCMOVsr, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, - imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isMoveImm = 1 in -def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, imm0_65535_expr:$imm, pred:$p), - 4, IIC_iMOVi, - []>, - RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; +def MOVCCi16 + : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p), + 4, IIC_iMOVi, + [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>, + Sched<[WriteALU]>; let isMoveImm = 1 in def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm, pred:$p), + (ins GPR:$false, so_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; // Two instruction predicate mov immediate. let isMoveImm = 1 in -def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, i32imm:$src, pred:$p), - 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; +def MOVCCi32imm + : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, i32imm:$src, cmovpred:$p), + 8, IIC_iCMOVix2, + [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; let isMoveImm = 1 in def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm, pred:$p), + (ins GPR:$false, so_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; } // neverHasSideEffects @@ -4189,10 +4265,20 @@ def memb_opt : Operand<i32> { let DecoderMethod = "DecodeMemBarrierOption"; } +def InstSyncBarrierOptOperand : AsmOperandClass { + let Name = "InstSyncBarrierOpt"; + let ParserMethod = "parseInstSyncBarrierOptOperand"; +} +def instsyncb_opt : Operand<i32> { + let PrintMethod = "printInstSyncBOption"; + let ParserMatchClass = InstSyncBarrierOptOperand; + let DecoderMethod = "DecodeInstSyncBarrierOption"; +} + // memory barriers protect the atomic sequences let hasSideEffects = 1 in { def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, - "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>, + "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>, Requires<[IsARM, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf57ff05; @@ -4201,7 +4287,7 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, } def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, - "dsb", "\t$opt", []>, + "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>, Requires<[IsARM, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf57ff04; @@ -4209,7 +4295,7 @@ def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, } // ISB has only full system option -def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, +def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary, "isb", "\t$opt", []>, Requires<[IsARM, HasDB]> { bits<4> opt; @@ -4217,124 +4303,219 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{3-0} = opt; } +let usesCustomInserter = 1, Defs = [CPSR] in { + // Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS -let usesCustomInserter = 1, Defs = [CPSR] in -def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; + def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; -let usesCustomInserter = 1 in { - let Defs = [CPSR] in { +// Atomic pseudo-insts which will be lowered to ldrex/strex loops. +// (64-bit pseudos use a hand-written selection code). + let mayLoad = 1, mayStore = 1 in { def ATOMIC_LOAD_ADD_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I8 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I8 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I16 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I16 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>; - - def ATOMIC_SWAP_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>; - def ATOMIC_SWAP_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_SWAP_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>; - - def ATOMIC_CMP_SWAP_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>; - def ATOMIC_CMP_SWAP_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_CMP_SWAP_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>; -} + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_ADD_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_SUB_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_AND_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_OR_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_XOR_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_NAND_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_MIN_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_MAX_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_UMIN_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_UMAX_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, + GPR:$set1, GPR:$set2, i32imm:$ordering), + NoItinerary, []>; + } + let mayLoad = 1 in + def ATOMIC_LOAD_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, i32imm:$ordering), + NoItinerary, []>; + let mayStore = 1 in + def ATOMIC_STORE_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; } let usesCustomInserter = 1 in { @@ -4344,48 +4525,147 @@ let usesCustomInserter = 1 in { [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>; } +def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldrex_2 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldrex_4 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +def strex_1 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def strex_2 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def strex_4 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + let mayLoad = 1 in { def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, - "ldrexb", "\t$Rt, $addr", []>; + NoItinerary, "ldrexb", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldrexh", "\t$Rt, $addr", []>; + NoItinerary, "ldrexh", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>; def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldrex", "\t$Rt, $addr", []>; + NoItinerary, "ldrex", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>; let hasExtraDefRegAllocReq = 1 in -def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), +def LDREXD : AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), NoItinerary, "ldrexd", "\t$Rt, $addr", []> { let DecoderMethod = "DecodeDoubleRegLoad"; } + +def LDAEXB : AIldaex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaexb", "\t$Rt, $addr", []>; +def LDAEXH : AIldaex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaexh", "\t$Rt, $addr", []>; +def LDAEX : AIldaex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaex", "\t$Rt, $addr", []>; +let hasExtraDefRegAllocReq = 1 in +def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), + NoItinerary, "ldaexd", "\t$Rt, $addr", []> { + let DecoderMethod = "DecodeDoubleRegLoad"; +} } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strexb", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_1 GPR:$Rt, addr_offset_none:$addr))]>; def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strexh", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_2 GPR:$Rt, addr_offset_none:$addr))]>; def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strex", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_4 GPR:$Rt, addr_offset_none:$addr))]>; let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), (ins GPRPairOp:$Rt, addr_offset_none:$addr), NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> { let DecoderMethod = "DecodeDoubleRegStore"; } +def STLEXB: AIstlex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexb", "\t$Rd, $Rt, $addr", + []>; +def STLEXH: AIstlex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexh", "\t$Rd, $Rt, $addr", + []>; +def STLEX : AIstlex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlex", "\t$Rd, $Rt, $addr", + []>; +let hasExtraSrcRegAllocReq = 1 in +def STLEXD : AIstlex<0b01, (outs GPR:$Rd), + (ins GPRPairOp:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexd", "\t$Rd, $Rt, $addr", []> { + let DecoderMethod = "DecodeDoubleRegStore"; +} } - -def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, +def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", + [(int_arm_clrex)]>, Requires<[IsARM, HasV7]> { let Inst{31-0} = 0b11110101011111111111000000011111; } +def : ARMPat<(and (ldrex_1 addr_offset_none:$addr), 0xff), + (LDREXB addr_offset_none:$addr)>; +def : ARMPat<(and (ldrex_2 addr_offset_none:$addr), 0xffff), + (LDREXH addr_offset_none:$addr)>; +def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (STREXB GPR:$Rt, addr_offset_none:$addr)>; +def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (STREXH GPR:$Rt, addr_offset_none:$addr)>; + +class acquiring_load<PatFrag base> + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); + return Ordering == Acquire || Ordering == SequentiallyConsistent; +}]>; + +def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; +def atomic_load_acquire_16 : acquiring_load<atomic_load_16>; +def atomic_load_acquire_32 : acquiring_load<atomic_load_32>; + +class releasing_store<PatFrag base> + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); + return Ordering == Release || Ordering == SequentiallyConsistent; +}]>; + +def atomic_store_release_8 : releasing_store<atomic_store_8>; +def atomic_store_release_16 : releasing_store<atomic_store_16>; +def atomic_store_release_32 : releasing_store<atomic_store_32>; + +let AddedComplexity = 8 in { + def : ARMPat<(atomic_load_acquire_8 addr_offset_none:$addr), (LDAB addr_offset_none:$addr)>; + def : ARMPat<(atomic_load_acquire_16 addr_offset_none:$addr), (LDAH addr_offset_none:$addr)>; + def : ARMPat<(atomic_load_acquire_32 addr_offset_none:$addr), (LDA addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (STLB GPR:$val, addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (STLH GPR:$val, addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>; +} + // SWP/SWPB are deprecated in V6/V7. let mayLoad = 1, mayStore = 1 in { def SWP : AIswp<0, (outs GPRnopc:$Rt), - (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>; + (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>, + Requires<[PreV8]>; def SWPB: AIswp<1, (outs GPRnopc:$Rt), - (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>; + (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>, + Requires<[PreV8]>; } //===----------------------------------------------------------------------===// @@ -4396,7 +4676,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + imm:$CRm, imm:$opc2)]>, + Requires<[PreV8]> { bits<4> opc1; bits<4> CRn; bits<4> CRd; @@ -4413,11 +4694,12 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{23-20} = opc1; } -def CDP2 : ABXI<0b1110, (outs), (ins pf_imm:$cop, imm0_15:$opc1, +def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + imm:$CRm, imm:$opc2)]>, + Requires<[PreV8]> { let Inst{31-28} = 0b1111; bits<4> opc1; bits<4> CRn; @@ -4595,10 +4877,10 @@ defm LDC : LdStCop <1, 0, "ldc">; defm LDCL : LdStCop <1, 1, "ldcl">; defm STC : LdStCop <0, 0, "stc">; defm STCL : LdStCop <0, 1, "stcl">; -defm LDC2 : LdSt2Cop<1, 0, "ldc2">; -defm LDC2L : LdSt2Cop<1, 1, "ldc2l">; -defm STC2 : LdSt2Cop<0, 0, "stc2">; -defm STC2L : LdSt2Cop<0, 1, "stc2l">; +defm LDC2 : LdSt2Cop<1, 0, "ldc2">, Requires<[PreV8]>; +defm LDC2L : LdSt2Cop<1, 1, "ldc2l">, Requires<[PreV8]>; +defm STC2 : LdSt2Cop<0, 0, "stc2">, Requires<[PreV8]>; +defm STC2L : LdSt2Cop<0, 1, "stc2l">, Requires<[PreV8]>; //===----------------------------------------------------------------------===// // Move between coprocessor and ARM core register. @@ -4631,16 +4913,17 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>; + imm:$CRm, imm:$opc2)]>, + ComplexDeprecationPredicate<"MCR">; def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", - (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), @@ -4650,7 +4933,7 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops, list<dag> pattern> : ABXI<0b1110, oops, iops, NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> { - let Inst{31-28} = 0b1111; + let Inst{31-24} = 0b11111110; let Inst{20} = direction; let Inst{4} = 1; @@ -4674,16 +4957,18 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>; + imm:$CRm, imm:$opc2)]>, + Requires<[PreV8]>; def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0)>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, - imm0_7:$opc2), []>; + imm0_7:$opc2), []>, + Requires<[PreV8]>; def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm", - (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, @@ -4718,7 +5003,8 @@ def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary, - !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { + !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>, + Requires<[PreV8]> { let Inst{31-28} = 0b1111; let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -4820,7 +5106,7 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in { def TPsoft : PseudoInst<(outs), (ins), IIC_Br, - [(set R0, ARMthread_pointer)]>; + [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>; } //===----------------------------------------------------------------------===// @@ -4884,7 +5170,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst), 4, IIC_Br, [(brind GPR:$dst)], (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, - Requires<[IsARM, NoV4T]>; + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; // Large immediate handling. @@ -5153,10 +5439,10 @@ def : MnemonicAlias<"rfeed", "rfeib">; def : MnemonicAlias<"rfe", "rfeia">; // SRS aliases -def : MnemonicAlias<"srsfa", "srsda">; -def : MnemonicAlias<"srsea", "srsdb">; -def : MnemonicAlias<"srsfd", "srsia">; -def : MnemonicAlias<"srsed", "srsib">; +def : MnemonicAlias<"srsfa", "srsib">; +def : MnemonicAlias<"srsea", "srsia">; +def : MnemonicAlias<"srsfd", "srsdb">; +def : MnemonicAlias<"srsed", "srsda">; def : MnemonicAlias<"srs", "srsia">; // QSAX == QSUBADDX @@ -5233,7 +5519,7 @@ def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm", cc_out:$s)>; } def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm", - (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>; + (ins GPR:$Rd, GPR:$Rm, pred:$p, cc_out:$s)>; let TwoOperandAliasConstraint = "$Rn = $Rd" in { def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm", (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, @@ -5269,4 +5555,5 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", // 'it' blocks in ARM mode just validate the predicates. The IT itself // is discarded. -def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>; +def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>, + ComplexDeprecationPredicate<"IT">; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 896fd0f7850c..43bd4c21dc39 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -626,7 +626,7 @@ class VLD1D<bits<4> op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VLD1Q<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), @@ -634,7 +634,7 @@ class VLD1Q<bits<4> op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VLD1d8 : VLD1D<{0,0,0,?}, "8">; @@ -655,16 +655,14 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } multiclass VLD1QWB<bits<4> op7_4, string Dt> { @@ -674,16 +672,14 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -703,7 +699,7 @@ class VLD1D3<bits<4> op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D3WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), @@ -712,16 +708,14 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -744,7 +738,7 @@ class VLD1D4<bits<4> op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D4WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), @@ -753,16 +747,14 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -786,7 +778,7 @@ class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; @@ -810,16 +802,14 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), itin, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } @@ -853,7 +843,7 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; @@ -872,7 +862,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; @@ -912,7 +902,7 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; @@ -931,7 +921,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; @@ -1348,7 +1338,6 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), @@ -1357,7 +1346,6 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { @@ -1369,7 +1357,6 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd, GPR:$wb), @@ -1378,7 +1365,6 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1419,7 +1405,6 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), @@ -1428,7 +1413,6 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1580,14 +1564,14 @@ class VST1D<bits<4> op7_4, string Dt> IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VST1Q<bits<4> op7_4, string Dt> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; @@ -1608,8 +1592,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), @@ -1617,8 +1600,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } multiclass VST1QWB<bits<4> op7_4, string Dt> { @@ -1628,8 +1610,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), @@ -1637,8 +1618,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1659,7 +1639,7 @@ class VST1D3<bits<4> op7_4, string Dt> IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D3WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), @@ -1668,8 +1648,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), @@ -1677,8 +1656,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1704,7 +1682,7 @@ class VST1D4<bits<4> op7_4, string Dt> []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D4WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), @@ -1713,8 +1691,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1722,8 +1699,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1748,7 +1724,7 @@ class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; @@ -1772,16 +1748,14 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } multiclass VST2QWB<bits<4> op7_4, string Dt> { @@ -1791,8 +1765,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1800,8 +1773,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } @@ -1835,7 +1807,7 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; @@ -1854,7 +1826,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; @@ -1894,7 +1866,7 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; @@ -1913,7 +1885,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; @@ -2379,6 +2351,40 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; +// Same as above, but not predicated. +class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; + +class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; + +// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). +class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; + +// Same as N2VQIntXnp but with Vd as a src register. +class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<op19_18, op17_16, op10_8, op7, op6, + (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { + let Constraints = "$src = $Vd"; +} + // Narrow 2-register operations. class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -2541,6 +2547,16 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp<op27_23, op21_20, op11_8, op6, op4, + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, + ResTy, OpTy, IntOp, Commutable, + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; + class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane32<0, 1, op21_20, op11_8, 1, 0, @@ -2552,6 +2568,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]> { let isCommutable = 0; } + class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, @@ -2584,6 +2601,29 @@ class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp<op27_23, op21_20, op11_8, op6, op4, + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, + ResTy, OpTy, IntOp, Commutable, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; + +// Same as N3VQIntnp but with Vd as a src register. +class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp<op27_23, op21_20, op11_8, op6, op4, + (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, + Dt, ResTy, OpTy, IntOp, Commutable, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), + (OpTy QPR:$Vm))))]> { + let Constraints = "$src = $Vd"; +} + class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -2834,6 +2874,7 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } + class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> @@ -2889,6 +2930,17 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } + +// Same as above, but not predicated. +class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp<op27_23, op21_20, op11_8, op6, op4, + (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, + ResTy, OpTy, IntOp, Commutable, + [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; + class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -3965,12 +4017,18 @@ defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) -defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", - int_arm_neon_vaddhn, 1>; +defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", int_arm_neon_vraddhn, 1>; +def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Multiply Operations. // VMUL : Vector Multiply (integer, polynomial and floating-point) @@ -4008,6 +4066,17 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; + +def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfd DPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; +def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfq QPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; + + // VQDMULH : Vector Saturating Doubling Multiply Returning High Half defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, @@ -4053,12 +4122,18 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "s", NEONvmulls, 1>; -defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "u", NEONvmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", - v8i16, v8i8, int_arm_neon_vmullp, 1>; +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "NEONData" in { + defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", NEONvmulls, 1>; + defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", NEONvmullu, 1>; + def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", + v8i16, v8i8, int_arm_neon_vmullp, 1>; + def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, + "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, + Requires<[HasV8, HasCrypto]>; +} defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; @@ -4125,8 +4200,27 @@ defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlal", "s", int_arm_neon_vqdmlal>; -defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; + "vqdmlal", "s", null_frag>; +defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; + +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; // VMLS : Vector Multiply Subtract (integer and floating-point) defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, @@ -4182,25 +4276,44 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlsl", "s", int_arm_neon_vqdmlsl>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; + "vqdmlsl", "s", null_frag>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; + +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; // Fused Vector Multiply Subtract (floating-point) def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; // Match @llvm.fma.* intrinsics def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), @@ -4248,12 +4361,18 @@ defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) -defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", - int_arm_neon_vsubhn, 0>; +defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", int_arm_neon_vrsubhn, 0>; +def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Comparisons. // VCEQ : Vector Compare Equal @@ -4659,6 +4778,18 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax", "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; +// VMAXNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v2f32, v2f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v4f32, v4f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, @@ -4673,6 +4804,18 @@ def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin", "f32", v4f32, v4f32, int_arm_neon_vmins, 1>; +// VMINNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v2f32, v2f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v4f32, v4f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // Vector Pairwise Operations. // VPADD : Vector Pairwise Add @@ -5015,10 +5158,10 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, // Vector Move Operations. // VMOV : Vector Move (Register) -def : InstAlias<"vmov${p} $Vd, $Vm", - (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -def : InstAlias<"vmov${p} $Vd, $Vm", - (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p} $Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p} $Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VMOV : Vector Move (Immediate) @@ -5386,6 +5529,26 @@ def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; +// VCVT{A, N, P, M} +multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, + SDPatternOperator IntU> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; + def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; + def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; + def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; + } +} + +defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; +defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; +defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; +defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; + // VCVT : Vector Convert Between Floating-Point and Fixed-Point. let DecoderMethod = "DecodeVCVTD" in { def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", @@ -5409,6 +5572,25 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; } +def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", + (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", + (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", + (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", + (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", + (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", + (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", + (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", + (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; + + // VCVT : Vector Convert Between Half-Precision and Single-Precision. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, IIC_VUNAQ, "vcvt", "f16.f32", @@ -5509,8 +5691,9 @@ class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), (Ty DPR:$Vm), imm:$index)))]> { - bits<4> index; - let Inst{11-8} = index{3-0}; + bits<3> index; + let Inst{11} = 0b0; + let Inst{10-8} = index{2-0}; } class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> @@ -5525,14 +5708,14 @@ class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> } def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { - let Inst{11-8} = index{3-0}; + let Inst{10-8} = index{2-0}; } def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { - let Inst{11-9} = index{2-0}; + let Inst{10-9} = index{1-0}; let Inst{8} = 0b0; } def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { - let Inst{11-10} = index{1-0}; + let Inst{10} = index{0}; let Inst{9-8} = 0b00; } def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), @@ -5657,6 +5840,77 @@ def VTBX4Pseudo IIC_VTBX4, "$orig = $dst", []>; } // DecoderMethod = "DecodeTBLInstruction" +// VRINT : Vector Rounding +multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + } + + def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), + (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>; + def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), + (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>; +} + +defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; +defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; +defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; +defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; +defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; +defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; + +// Cryptography instructions +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "v8Crypto" in { + class AES<string op, bit op7, bit op6, SDPatternOperator Int> + : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> + : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> + : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, + Requires<[HasV8, HasCrypto]>; +} + +def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; +def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; +def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; +def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; + +def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>; +def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; +def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; +def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>; +def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>; +def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>; +def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; +def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; +def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; +def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// @@ -6697,12 +6951,17 @@ def VST4qWB_register_Asm_32 : (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -// VMOV takes an optional datatype suffix +// VMOV/VMVN takes an optional datatype suffix defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; + // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. // D-register versions. def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index ae7a5c00bd74..af5ef537b536 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -69,11 +69,6 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(V, MVT::i32); }]>; -// ADR instruction labels. -def t_adrlabel : Operand<i32> { - let EncoderMethod = "getThumbAdrLabelOpValue"; -} - // Scaled 4 immediate. def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; } def t_imm0_1020s4 : Operand<i32> { @@ -97,12 +92,34 @@ def t_imm0_508s4_neg : Operand<i32> { // Define Thumb specific addressing modes. +// unsigned 8-bit, 2-scaled memory offset +class OperandUnsignedOffset_b8s2 : AsmOperandClass { + let Name = "UnsignedOffset_b8s2"; + let PredicateMethod = "isUnsignedOffset<8, 2>"; +} + +def UnsignedOffset_b8s2 : OperandUnsignedOffset_b8s2; + +// thumb style PC relative operand. signed, 8 bits magnitude, +// two bits shift. can be represented as either [pc, #imm], #imm, +// or relocatable expression... +def ThumbMemPC : AsmOperandClass { + let Name = "ThumbMemPC"; +} + let OperandType = "OPERAND_PCREL" in { def t_brtarget : Operand<OtherVT> { let EncoderMethod = "getThumbBRTargetOpValue"; let DecoderMethod = "DecodeThumbBROperand"; } +// ADR instruction labels. +def t_adrlabel : Operand<i32> { + let EncoderMethod = "getThumbAdrLabelOpValue"; + let PrintMethod = "printAdrLabelOperand<2>"; + let ParserMatchClass = UnsignedOffset_b8s2; +} + def t_bcctarget : Operand<i32> { let EncoderMethod = "getThumbBCCTargetOpValue"; let DecoderMethod = "DecodeThumbBCCTargetOperand"; @@ -122,6 +139,15 @@ def t_blxtarget : Operand<i32> { let EncoderMethod = "getThumbBLXTargetOpValue"; let DecoderMethod = "DecodeThumbBLXOffset"; } + +// t_addrmode_pc := <label> => pc + imm8 * 4 +// +def t_addrmode_pc : Operand<i32> { + let EncoderMethod = "getAddrModePCOpValue"; + let DecoderMethod = "DecodeThumbAddrModePC"; + let PrintMethod = "printThumbLdrLabelOperand"; + let ParserMatchClass = ThumbMemPC; +} } // t_addrmode_rr := reg + reg @@ -218,14 +244,6 @@ def t_addrmode_sp : Operand<i32>, let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } -// t_addrmode_pc := <label> => pc + imm8 * 4 -// -def t_addrmode_pc : Operand<i32> { - let EncoderMethod = "getAddrModePCOpValue"; - let DecoderMethod = "DecodeThumbAddrModePC"; - let PrintMethod = "printThumbLdrLabelOperand"; -} - //===----------------------------------------------------------------------===// // Miscellaneous Instructions. // @@ -251,25 +269,26 @@ class T1SystemEncoding<bits<8> opc> let Inst{7-0} = opc; } -def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>, - T1SystemEncoding<0x00>, // A8.6.110 - Requires<[IsThumb2]>; - -def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>, - T1SystemEncoding<0x10>, // A8.6.410 - Requires<[IsThumb2]>; - -def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>, - T1SystemEncoding<0x20>, // A8.6.408 - Requires<[IsThumb2]>; +def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", []>, + T1SystemEncoding<0x00>, + Requires<[IsThumb, HasV6M]> { + bits<4> imm; + let Inst{7-4} = imm; +} -def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>, - T1SystemEncoding<0x30>, // A8.6.409 - Requires<[IsThumb2]>; +class tHintAlias<string Asm, dag Result> : tInstAlias<Asm, Result> { + let Predicates = [IsThumb, HasV6M]; +} -def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>, - T1SystemEncoding<0x40>, // A8.6.157 - Requires<[IsThumb2]>; +def : tHintAlias<"nop$p", (tHINT 0, pred:$p)>; // A8.6.110 +def : tHintAlias<"yield$p", (tHINT 1, pred:$p)>; // A8.6.410 +def : tHintAlias<"wfe$p", (tHINT 2, pred:$p)>; // A8.6.408 +def : tHintAlias<"wfi$p", (tHINT 3, pred:$p)>; // A8.6.409 +def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157 +def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> { + let Predicates = [IsThumb2, HasV8]; +} +def : T2Pat<(int_arm_sevl), (tHINT 5)>; // The imm operand $val can be used by a debugger to store more information // about the breakpoint. @@ -282,8 +301,15 @@ def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val", let Inst{7-0} = val; } +def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val", + []>, T1Encoding<0b101110>, Requires<[IsThumb, HasV8]> { + let Inst{9-6} = 0b1010; + bits<6> val; + let Inst{5-0} = val; +} + def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end", - []>, T1Encoding<0b101101> { + []>, T1Encoding<0b101101>, Deprecated<HasV8Ops> { bits<1> end; // A8.6.156 let Inst{9-5} = 0b10010; @@ -310,7 +336,7 @@ def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags), let isNotDuplicable = 1, isCodeGenOnly = 1 in def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "", [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>, - T1Special<{0,0,?,?}> { + T1Special<{0,0,?,?}>, Sched<[WriteALU]> { // A8.6.6 bits<3> dst; let Inst{6-3} = 0b1111; // Rm = pc @@ -323,7 +349,7 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "", // probably because the instruction can be moved around. def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm), IIC_iALUi, "add", "\t$dst, $sp, $imm", []>, - T1Encoding<{1,0,1,0,1,?}> { + T1Encoding<{1,0,1,0,1,?}>, Sched<[WriteALU]> { // A6.2 & A8.6.8 bits<3> dst; bits<8> imm; @@ -335,7 +361,7 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm), // ADD sp, sp, #<imm7> def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), IIC_iALUi, "add", "\t$Rdn, $imm", []>, - T1Misc<{0,0,0,0,0,?,?}> { + T1Misc<{0,0,0,0,0,?,?}>, Sched<[WriteALU]> { // A6.2.5 & A8.6.8 bits<7> imm; let Inst{6-0} = imm; @@ -346,7 +372,7 @@ def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), // FIXME: The encoding and the ASM string don't match up. def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), IIC_iALUi, "sub", "\t$Rdn, $imm", []>, - T1Misc<{0,0,0,0,1,?,?}> { + T1Misc<{0,0,0,0,1,?,?}>, Sched<[WriteALU]> { // A6.2.5 & A8.6.214 bits<7> imm; let Inst{6-0} = imm; @@ -367,7 +393,7 @@ def : tInstAlias<"sub${p} sp, sp, $imm", // ADD <Rm>, sp def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr, "add", "\t$Rdn, $sp, $Rn", []>, - T1Special<{0,0,?,?}> { + T1Special<{0,0,?,?}>, Sched<[WriteALU]> { // A8.6.9 Encoding T1 bits<4> Rdn; let Inst{7} = Rdn{3}; @@ -379,7 +405,7 @@ def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr, // ADD sp, <Rm> def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr, "add", "\t$Rdn, $Rm", []>, - T1Special<{0,0,?,?}> { + T1Special<{0,0,?,?}>, Sched<[WriteALU]> { // A8.6.9 Encoding T2 bits<4> Rm; let Inst{7} = 1; @@ -395,7 +421,7 @@ def tADDspr : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, GPR:$Rm), IIC_iALUr, // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>, - T1Special<{1,1,0,?}> { + T1Special<{1,1,0,?}>, Sched<[WriteBr]> { // A6.2.3 & A8.6.25 bits<4> Rm; let Inst{6-3} = Rm; @@ -406,12 +432,12 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br, - [(ARMretflag)], (tBX LR, pred:$p)>; + [(ARMretflag)], (tBX LR, pred:$p)>, Sched<[WriteBr]>; // Alternative return instruction used by vararg functions. def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p), 2, IIC_Br, [], - (tBX GPR:$Rm, pred:$p)>; + (tBX GPR:$Rm, pred:$p)>, Sched<[WriteBr]>; } // All calls clobber the non-callee saved registers. SP is marked as a use to @@ -424,7 +450,7 @@ let isCall = 1, (outs), (ins pred:$p, t_bltarget:$func), IIC_Br, "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb]> { + Requires<[IsThumb]>, Sched<[WriteBrL]> { bits<24> func; let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; @@ -438,7 +464,7 @@ let isCall = 1, (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T]> { + Requires<[IsThumb, HasV5T]>, Sched<[WriteBrL]> { bits<24> func; let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; @@ -453,7 +479,7 @@ let isCall = 1, "blx${p}\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T]>, - T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24; + T1Special<{1,1,1,?}>, Sched<[WriteBrL]> { // A6.2.3 & A8.6.24; bits<4> func; let Inst{6-3} = func; let Inst{2-0} = 0b000; @@ -463,29 +489,32 @@ let isCall = 1, def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func), 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only]>; + Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in def tB : T1pI<(outs), (ins t_brtarget:$target), IIC_Br, "b", "\t$target", [(br bb:$target)]>, - T1Encoding<{1,1,1,0,0,?}> { + T1Encoding<{1,1,1,0,0,?}>, Sched<[WriteBr]> { bits<11> target; let Inst{10-0} = target; - } + let AsmMatchConverter = "cvtThumbBranches"; + } // Far jump // Just a pseudo for a tBL instruction. Needed to let regalloc know about // the clobber of LR. let Defs = [LR] in def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target, pred:$p), - 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>; + 4, IIC_Br, [], (tBL pred:$p, t_bltarget:$target)>, + Sched<[WriteBrTbl]>; def tBR_JTr : tPseudoInst<(outs), (ins tGPR:$target, i32imm:$jt, i32imm:$id), 0, IIC_Br, - [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> { + [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>, + Sched<[WriteBrTbl]> { list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } } @@ -496,13 +525,15 @@ let isBranch = 1, isTerminator = 1 in def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br, "b${p}\t$target", [/*(ARMbrcond bb:$target, imm:$cc)*/]>, - T1BranchCond<{1,1,0,1}> { + T1BranchCond<{1,1,0,1}>, Sched<[WriteBr]> { bits<4> p; bits<8> target; let Inst{11-8} = p; let Inst{7-0} = target; + let AsmMatchConverter = "cvtThumbBranches"; } + // Tail calls let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS versions. @@ -510,7 +541,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst), 4, IIC_Br, [], (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb]>; + Requires<[IsThumb]>, Sched<[WriteBr]>; } // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls // on IOS), so it's in ARMInstrThumb2.td. @@ -520,7 +551,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { (ins t_brtarget:$dst, pred:$p), 4, IIC_Br, [], (tB t_brtarget:$dst, pred:$p)>, - Requires<[IsThumb, IsNotIOS]>; + Requires<[IsThumb, IsNotIOS]>, Sched<[WriteBr]>; } } @@ -530,7 +561,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // If Inst{11-8} == 0b1111 then SEE SVC let isCall = 1, Uses = [SP] in def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br, - "svc", "\t$imm", []>, Encoding16 { + "svc", "\t$imm", []>, Encoding16, Sched<[WriteBr]> { bits<8> imm; let Inst{15-12} = 0b1101; let Inst{11-8} = 0b1111; @@ -540,7 +571,7 @@ def tSVC : T1pI<(outs), (ins imm0_255:$imm), IIC_Br, // The assembler uses 0xDEFE for a trap instruction. let isBarrier = 1, isTerminator = 1 in def tTRAP : TI<(outs), (ins), IIC_Br, - "trap", [(trap)]>, Encoding16 { + "trap", [(trap)]>, Encoding16, Sched<[WriteBr]> { let Inst = 0xdefe; } @@ -627,11 +658,9 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, let Inst{7-0} = addr; } -// Load tconstpool -// FIXME: Use ldr.n to work around a darwin assembler bug. -let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, - "ldr", ".n\t$Rt, $addr", + "ldr", "\t$Rt, $addr", [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, T1Encoding<{0,1,0,0,1,?}> { // A6.2 & A8.6.59 @@ -641,18 +670,6 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, let Inst{7-0} = addr; } -// FIXME: Remove this entry when the above ldr.n workaround is fixed. -// For assembly/disassembly use only. -def tLDRpciASM : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, - "ldr", "\t$Rt, $addr", []>, - T1Encoding<{0,1,0,0,1,?}> { - // A6.2 & A8.6.59 - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} - // A8.6.194 & A8.6.192 defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4, t_addrmode_is4, AddrModeT1_4, @@ -833,14 +850,15 @@ let isCommutable = 1, Uses = [CPSR] in def tADC : // A8.6.2 T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "adc", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // Add immediate def tADDi3 : // A8.6.4 T1 T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), IIC_iALUi, "add", "\t$Rd, $Rm, $imm3", - [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> { + [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]>, + Sched<[WriteALU]> { bits<3> imm3; let Inst{8-6} = imm3; } @@ -849,7 +867,8 @@ def tADDi8 : // A8.6.4 T2 T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi, "add", "\t$Rdn, $imm8", - [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>; + [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>, + Sched<[WriteALU]>; // Add register let isCommutable = 1 in @@ -857,12 +876,12 @@ def tADDrr : // A8.6.6 T1 T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "add", "\t$Rd, $Rn, $Rm", - [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; let neverHasSideEffects = 1 in def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr, "add", "\t$Rdn, $Rm", []>, - T1Special<{0,0,?,?}> { + T1Special<{0,0,?,?}>, Sched<[WriteALU]> { // A8.6.6 T2 bits<4> Rdn; bits<4> Rm; @@ -877,14 +896,15 @@ def tAND : // A8.6.12 T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iBITr, "and", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // ASR immediate def tASRri : // A8.6.14 T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5), IIC_iMOVsi, "asr", "\t$Rd, $Rm, $imm5", - [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]> { + [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm_sr:$imm5)))]>, + Sched<[WriteALU]> { bits<5> imm5; let Inst{10-6} = imm5; } @@ -894,14 +914,15 @@ def tASRrr : // A8.6.15 T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iMOVsr, "asr", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // BIC register def tBIC : // A8.6.20 T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iBITr, "bic", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>; + [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>, + Sched<[WriteALU]>; // CMN register let isCompare = 1, Defs = [CPSR] in { @@ -917,7 +938,7 @@ def tCMNz : // A8.6.33 T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iCMPr, "cmn", "\t$Rn, $Rm", - [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>; + [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>, Sched<[WriteCMP]>; } // isCompare = 1, Defs = [CPSR] @@ -926,7 +947,7 @@ let isCompare = 1, Defs = [CPSR] in { def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi, "cmp", "\t$Rn, $imm8", [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>, - T1General<{1,0,1,?,?}> { + T1General<{1,0,1,?,?}>, Sched<[WriteCMP]> { // A8.6.35 bits<3> Rn; bits<8> imm8; @@ -939,11 +960,11 @@ def tCMPr : // A8.6.36 T1 T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iCMPr, "cmp", "\t$Rn, $Rm", - [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>; + [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>, Sched<[WriteCMP]>; def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr, "cmp", "\t$Rn, $Rm", []>, - T1Special<{0,1,?,?}> { + T1Special<{0,1,?,?}>, Sched<[WriteCMP]> { // A8.6.36 T2 bits<4> Rm; bits<4> Rn; @@ -960,14 +981,15 @@ def tEOR : // A8.6.45 T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iBITr, "eor", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // LSL immediate def tLSLri : // A8.6.88 T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_31:$imm5), IIC_iMOVsi, "lsl", "\t$Rd, $Rm, $imm5", - [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> { + [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]>, + Sched<[WriteALU]> { bits<5> imm5; let Inst{10-6} = imm5; } @@ -977,14 +999,15 @@ def tLSLrr : // A8.6.89 T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iMOVsr, "lsl", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // LSR immediate def tLSRri : // A8.6.90 T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, imm_sr:$imm5), IIC_iMOVsi, "lsr", "\t$Rd, $Rm, $imm5", - [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]> { + [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm_sr:$imm5)))]>, + Sched<[WriteALU]> { bits<5> imm5; let Inst{10-6} = imm5; } @@ -994,14 +1017,14 @@ def tLSRrr : // A8.6.91 T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iMOVsr, "lsr", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // Move register let isMoveImm = 1 in def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi, "mov", "\t$Rd, $imm8", [(set tGPR:$Rd, imm0_255:$imm8)]>, - T1General<{1,0,0,?,?}> { + T1General<{1,0,0,?,?}>, Sched<[WriteALU]> { // A8.6.96 bits<3> Rd; bits<8> imm8; @@ -1019,7 +1042,7 @@ let neverHasSideEffects = 1 in { def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone, 2, IIC_iMOVr, "mov", "\t$Rd, $Rm", "", []>, - T1Special<{1,0,?,?}> { + T1Special<{1,0,?,?}>, Sched<[WriteALU]> { // A8.6.97 bits<4> Rd; bits<4> Rm; @@ -1029,7 +1052,7 @@ def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone, } let Defs = [CPSR] in def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, - "movs\t$Rd, $Rm", []>, Encoding16 { + "movs\t$Rd, $Rm", []>, Encoding16, Sched<[WriteALU]> { // A8.6.97 bits<3> Rd; bits<3> Rm; @@ -1060,7 +1083,7 @@ def :tInstAlias<"mul${s}${p} $Rdm, $Rn", (tMUL tGPR:$Rdm, s_cc_out:$s, tGPR:$Rn, def tMVN : // A8.6.107 T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr, "mvn", "\t$Rd, $Rn", - [(set tGPR:$Rd, (not tGPR:$Rn))]>; + [(set tGPR:$Rd, (not tGPR:$Rn))]>, Sched<[WriteALU]>; // Bitwise or register let isCommutable = 1 in @@ -1068,7 +1091,7 @@ def tORR : // A8.6.114 T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iBITr, "orr", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; // Swaps def tREV : // A8.6.134 @@ -1076,35 +1099,36 @@ def tREV : // A8.6.134 IIC_iUNAr, "rev", "\t$Rd, $Rm", [(set tGPR:$Rd, (bswap tGPR:$Rm))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>; def tREV16 : // A8.6.135 T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iUNAr, "rev16", "\t$Rd, $Rm", [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>; def tREVSH : // A8.6.136 T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>; // Rotate right register def tROR : // A8.6.139 T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iMOVsr, "ror", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>, + Sched<[WriteALU]>; // Negate register def tRSB : // A8.6.141 T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iALUi, "rsb", "\t$Rd, $Rn, #0", - [(set tGPR:$Rd, (ineg tGPR:$Rn))]>; + [(set tGPR:$Rd, (ineg tGPR:$Rn))]>, Sched<[WriteALU]>; // Subtract with carry register let Uses = [CPSR] in @@ -1112,14 +1136,16 @@ def tSBC : // A8.6.151 T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "sbc", "\t$Rdn, $Rm", - [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>, + Sched<[WriteALU]>; // Subtract immediate def tSUBi3 : // A8.6.210 T1 T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3), IIC_iALUi, "sub", "\t$Rd, $Rm, $imm3", - [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> { + [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]>, + Sched<[WriteALU]> { bits<3> imm3; let Inst{8-6} = imm3; } @@ -1128,14 +1154,16 @@ def tSUBi8 : // A8.6.210 T2 T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi, "sub", "\t$Rdn, $imm8", - [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>; + [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>, + Sched<[WriteALU]>; // Subtract register def tSUBrr : // A8.6.212 T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, "sub", "\t$Rd, $Rn, $Rm", - [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>; + [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>, + Sched<[WriteALU]>; // Sign-extend byte def tSXTB : // A8.6.222 @@ -1143,7 +1171,8 @@ def tSXTB : // A8.6.222 IIC_iUNAr, "sxtb", "\t$Rd, $Rm", [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, + Sched<[WriteALU]>; // Sign-extend short def tSXTH : // A8.6.224 @@ -1151,14 +1180,16 @@ def tSXTH : // A8.6.224 IIC_iUNAr, "sxth", "\t$Rd, $Rm", [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, + Sched<[WriteALU]>; // Test let isCompare = 1, isCommutable = 1, Defs = [CPSR] in def tTST : // A8.6.230 T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr, "tst", "\t$Rn, $Rm", - [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>; + [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>, + Sched<[WriteALU]>; // Zero-extend byte def tUXTB : // A8.6.262 @@ -1166,7 +1197,8 @@ def tUXTB : // A8.6.262 IIC_iUNAr, "uxtb", "\t$Rd, $Rm", [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, + Sched<[WriteALU]>; // Zero-extend short def tUXTH : // A8.6.264 @@ -1174,22 +1206,22 @@ def tUXTH : // A8.6.264 IIC_iUNAr, "uxth", "\t$Rd, $Rm", [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; + Requires<[IsThumb, IsThumb1Only, HasV6]>, Sched<[WriteALU]>; // Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation. // Expanded after instruction selection into a branch sequence. let usesCustomInserter = 1 in // Expanded after instruction selection. def tMOVCCr_pseudo : - PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc), - NoItinerary, - [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; + PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, cmovpred:$p), + NoItinerary, + [(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, cmovpred:$p))]>; // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p), IIC_iALUi, "adr{$p}\t$Rd, $addr", []>, - T1Encoding<{1,0,1,0,0,?}> { + T1Encoding<{1,0,1,0,0,?}>, Sched<[WriteALU]> { bits<3> Rd; bits<8> addr; let Inst{10-8} = Rd; @@ -1199,12 +1231,12 @@ def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p), let neverHasSideEffects = 1, isReMaterializable = 1 in def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p), - 2, IIC_iALUi, []>; + 2, IIC_iALUi, []>, Sched<[WriteALU]>; let hasSideEffects = 1 in def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), - 2, IIC_iALUi, []>; + 2, IIC_iALUi, []>, Sched<[WriteALU]>; //===----------------------------------------------------------------------===// // TLS Instructions @@ -1215,7 +1247,8 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // complete with fixup for the aeabi_read_tp function. let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br, - [(set R0, ARMthread_pointer)]>; + [(set R0, ARMthread_pointer)]>, + Sched<[WriteBr]>; //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics @@ -1381,13 +1414,13 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops), 2, IIC_iPop_Br, [], - (tPOP pred:$p, reglist:$regs)>; + (tPOP pred:$p, reglist:$regs)>, Sched<[WriteBrL]>; // Indirect branch using "mov pc, $Rm" let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p), 2, IIC_Br, [(brind GPR:$Rm)], - (tMOVr PC, GPR:$Rm, pred:$p)>; + (tMOVr PC, GPR:$Rm, pred:$p)>, Sched<[WriteBr]>; } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index 4dacb86df4ce..48acffd3a64e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -173,14 +173,13 @@ def t2ldr_pcrel_imm12 : Operand<i32> { // ADR instruction labels. def t2adrlabel : Operand<i32> { let EncoderMethod = "getT2AdrLabelOpValue"; - let PrintMethod = "printAdrLabelOperand"; + let PrintMethod = "printAdrLabelOperand<0>"; } - // t2addrmode_posimm8 := reg + imm8 def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";} def t2addrmode_posimm8 : Operand<i32> { - let PrintMethod = "printT2AddrModeImm8Operand"; + let PrintMethod = "printT2AddrModeImm8Operand<false>"; let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; let ParserMatchClass = MemPosImm8OffsetAsmOperand; @@ -191,7 +190,7 @@ def t2addrmode_posimm8 : Operand<i32> { def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";} def t2addrmode_negimm8 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { - let PrintMethod = "printT2AddrModeImm8Operand"; + let PrintMethod = "printT2AddrModeImm8Operand<false>"; let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; let ParserMatchClass = MemNegImm8OffsetAsmOperand; @@ -200,15 +199,22 @@ def t2addrmode_negimm8 : Operand<i32>, // t2addrmode_imm8 := reg +/- imm8 def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; } -def t2addrmode_imm8 : Operand<i32>, - ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { - let PrintMethod = "printT2AddrModeImm8Operand"; +class T2AddrMode_Imm8 : Operand<i32>, + ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; let ParserMatchClass = MemImm8OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } +def t2addrmode_imm8 : T2AddrMode_Imm8 { + let PrintMethod = "printT2AddrModeImm8Operand<false>"; +} + +def t2addrmode_imm8_pre : T2AddrMode_Imm8 { + let PrintMethod = "printT2AddrModeImm8Operand<true>"; +} + def t2am_imm8_offset : Operand<i32>, ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", [], [SDNPWantRoot]> { @@ -219,14 +225,21 @@ def t2am_imm8_offset : Operand<i32>, // t2addrmode_imm8s4 := reg +/- (imm8 << 2) def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";} -def t2addrmode_imm8s4 : Operand<i32> { - let PrintMethod = "printT2AddrModeImm8s4Operand"; +class T2AddrMode_Imm8s4 : Operand<i32> { let EncoderMethod = "getT2AddrModeImm8s4OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8s4"; let ParserMatchClass = MemImm8s4OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } +def t2addrmode_imm8s4 : T2AddrMode_Imm8s4 { + let PrintMethod = "printT2AddrModeImm8s4Operand<false>"; +} + +def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 { + let PrintMethod = "printT2AddrModeImm8s4Operand<true>"; +} + def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; } def t2am_imm8s4_offset : Operand<i32> { let PrintMethod = "printT2AddrModeImm8s4OffsetOperand"; @@ -238,7 +251,8 @@ def t2am_imm8s4_offset : Operand<i32> { def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass { let Name = "MemImm0_1020s4Offset"; } -def t2addrmode_imm0_1020s4 : Operand<i32> { +def t2addrmode_imm0_1020s4 : Operand<i32>, + ComplexPattern<i32, 2, "SelectT2AddrModeExclusive"> { let PrintMethod = "printT2AddrModeImm0_1020s4Operand"; let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue"; let DecoderMethod = "DecodeT2AddrModeImm0_1020s4"; @@ -451,6 +465,18 @@ class T2ThreeReg<dag oops, dag iops, InstrItinClass itin, let Inst{3-0} = Rm; } +class T2ThreeRegNoP<dag oops, dag iops, InstrItinClass itin, + string asm, list<dag> pattern> + : T2XI<oops, iops, itin, asm, pattern> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{3-0} = Rm; +} + class T2sThreeReg<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : T2sI<oops, iops, itin, opc, asm, pattern> { @@ -554,7 +580,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii, opc, "\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -563,7 +590,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, // register def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir, opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"), - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -576,7 +604,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, def rs : T2sTwoRegShiftedReg< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis, opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"), - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>, + Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -635,7 +664,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { def ri : T2sTwoRegImm< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, opc, ".w\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> { + [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]>, + Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -645,7 +675,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { def rr : T2sThreeReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, opc, "\t$Rd, $Rn, $Rm", - [/* For disassembly only; pattern left blank */]> { + [/* For disassembly only; pattern left blank */]>, + Sched<[WriteALU, ReadALU, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -657,7 +688,8 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { def rs : T2sTwoRegShiftedReg< (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> { + [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>, + Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -678,12 +710,14 @@ multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir, (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p), 4, iii, [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn, - t2_so_imm:$imm))]>; + t2_so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]>; // register def rr : t2PseudoInst<(outs rGPR:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm, pred:$p), 4, iir, [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn, - rGPR:$Rm))]> { + rGPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { let isCommutable = Commutable; } // shifted register @@ -691,7 +725,8 @@ multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir, (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p), 4, iis, [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn, - t2_so_reg:$ShiftedRm))]>; + t2_so_reg:$ShiftedRm))]>, + Sched<[WriteALUsi, ReadALUsr]>; } } @@ -704,13 +739,15 @@ multiclass T2I_rbin_s_is<PatFrag opnode> { (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p), 4, IIC_iALUi, [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, - rGPR:$Rn))]>; + rGPR:$Rn))]>, + Sched<[WriteALU, ReadALU]>; // shifted register def rs : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p), 4, IIC_iALUsi, [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, - rGPR:$Rn))]>; + rGPR:$Rn))]>, + Sched<[WriteALUsi, ReadALU]>; } } @@ -725,7 +762,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, def ri : T2sTwoRegImm< (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi, opc, ".w\t$Rd, $Rn, $imm", - [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]> { + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24} = 1; @@ -737,7 +775,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, def ri12 : T2I< (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi, !strconcat(opc, "w"), "\t$Rd, $Rn, $imm", - [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> { + [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]>, + Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -755,7 +794,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, // register def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm", - [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]> { + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, rGPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -769,7 +809,8 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, def rs : T2sTwoRegShiftedReg< (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", - [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]> { + [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]>, + Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24} = 1; @@ -787,7 +828,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, opc, "\t$Rd, $Rn, $imm", [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_imm:$imm, CPSR))]>, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -797,7 +838,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, rGPR:$Rm, CPSR))]>, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALU, ReadALU, ReadALU]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -811,7 +852,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", [(set rGPR:$Rd, CPSR, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm, CPSR))]>, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -826,7 +867,8 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> { def ri : T2sTwoRegShiftImm< (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi, opc, ".w\t$Rd, $Rm, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rm, (i32 ty:$imm)))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-21} = 0b010010; let Inst{19-16} = 0b1111; // Rn @@ -836,7 +878,8 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> { def rr : T2sThreeReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr, opc, ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-21} = opcod; @@ -880,7 +923,7 @@ let isCompare = 1, Defs = [CPSR] in { def ri : T2OneRegCmpImm< (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii, opc, ".w\t$Rn, $imm", - [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]> { + [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -892,7 +935,7 @@ let isCompare = 1, Defs = [CPSR] in { def rr : T2TwoRegCmp< (outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir, opc, ".w\t$Rn, $Rm", - [(opnode GPRnopc:$Rn, rGPR:$Rm)]> { + [(opnode GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -906,7 +949,8 @@ let isCompare = 1, Defs = [CPSR] in { def rs : T2OneRegCmpShiftedReg< (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis, opc, ".w\t$Rn, $ShiftedRm", - [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> { + [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>, + Sched<[WriteCMPsi]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -941,6 +985,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let Inst{19-16} = addr{16-13}; // Rn let Inst{15-12} = Rt; let Inst{11-0} = addr{11-0}; // imm + + let DecoderMethod = "DecodeT2LoadImm12"; } def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", @@ -961,6 +1007,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let Inst{9} = addr{8}; // U let Inst{8} = 0; // The W bit. let Inst{7-0} = addr{7-0}; // imm + + let DecoderMethod = "DecodeT2LoadImm8"; } def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", @@ -993,14 +1041,18 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; - let Inst{23} = ?; // add = (U == '1') let Inst{22-21} = opcod; let Inst{20} = 1; // load let Inst{19-16} = 0b1111; // Rn + bits<4> Rt; - bits<12> addr; let Inst{15-12} = Rt{3-0}; + + bits<13> addr; + let Inst{23} = addr{12}; // add = (U == '1') let Inst{11-0} = addr{11-0}; + + let DecoderMethod = "DecodeT2LoadLabel"; } } @@ -1167,7 +1219,8 @@ class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin, // assembler. def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), (ins t2adrlabel:$addr, pred:$p), - IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []> { + IIC_iALUi, "adr{$p}.w\t$Rd, $addr", []>, + Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25-24} = 0b10; // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) @@ -1190,12 +1243,12 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), let neverHasSideEffects = 1, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), - 4, IIC_iALUi, []>; + 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; let hasSideEffects = 1 in def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, - []>; + []>, Sched<[WriteALU, ReadALU]>; //===----------------------------------------------------------------------===// @@ -1209,15 +1262,15 @@ defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR, // Loads with zero extension defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(zextloadi16 node:$Src)>>; + GPR, UnOpFrag<(zextloadi16 node:$Src)>>; defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(zextloadi8 node:$Src)>>; + GPR, UnOpFrag<(zextloadi8 node:$Src)>>; // Loads with sign extension defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(sextloadi16 node:$Src)>>; + GPR, UnOpFrag<(sextloadi16 node:$Src)>>; defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(sextloadi8 node:$Src)>>; + GPR, UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword @@ -1275,12 +1328,9 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), let mayLoad = 1, neverHasSideEffects = 1 in { def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, - "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), @@ -1288,48 +1338,42 @@ def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + []>; + def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + []>; + def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, @@ -1354,6 +1398,8 @@ class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii> let Inst{11} = 1; let Inst{10-8} = 0b110; // PUW. let Inst{7-0} = addr{7-0}; + + let DecoderMethod = "DecodeT2LoadT"; } def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>; @@ -1362,6 +1408,32 @@ def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>; def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>; def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>; +class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops, + string opc, string asm, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, + opc, asm, "", pattern>, Requires<[IsThumb, HasV8]> { + bits<4> Rt; + bits<4> addr; + + let Inst{31-27} = 0b11101; + let Inst{26-24} = 0b000; + let Inst{23-20} = bits23_20; + let Inst{11-6} = 0b111110; + let Inst{5-4} = bit54; + let Inst{3-0} = 0b1111; + + // Encode instruction operands + let Inst{19-16} = addr; + let Inst{15-12} = Rt; +} + +def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt), + (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>; +def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt), + (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>; +def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt), + (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>; + // Store defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, BinOpFrag<(store node:$LHS, node:$RHS)>>; @@ -1380,27 +1452,22 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), let mayStore = 1, neverHasSideEffects = 1 in { def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), - (ins GPRnopc:$Rt, t2addrmode_imm8:$addr), + (ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; -} + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, t2addrmode_imm8:$addr), + (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "strh", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; -} + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, t2addrmode_imm8:$addr), + (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, "strb", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; -} + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; } // mayStore = 1, neverHasSideEffects = 1 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), @@ -1487,9 +1554,8 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; // For disassembly only. def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), - (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_ru, + (ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { - let AsmMatchConverter = "cvtT2LdrdPre"; let DecoderMethod = "DecodeT2LDRDPreInstruction"; } @@ -1499,10 +1565,9 @@ def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), "$addr.base = $wb", []>; def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb), - (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), + (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { - let AsmMatchConverter = "cvtT2StrdPre"; let DecoderMethod = "DecodeT2STRDPreInstruction"; } @@ -1512,6 +1577,31 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm", "$addr.base = $wb", []>; +class T2Istrrel<bits<2> bit54, dag oops, dag iops, + string opc, string asm, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc, + asm, "", pattern>, Requires<[IsThumb, HasV8]> { + bits<4> Rt; + bits<4> addr; + + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001100; + let Inst{11-6} = 0b111110; + let Inst{5-4} = bit54; + let Inst{3-0} = 0b1111; + + // Encode instruction operands + let Inst{19-16} = addr; + let Inst{15-12} = Rt; +} + +def t2STL : T2Istrrel<0b10, (outs), (ins rGPR:$Rt, addr_offset_none:$addr), + "stl", "\t$Rt, $addr", []>; +def t2STLB : T2Istrrel<0b00, (outs), (ins rGPR:$Rt, addr_offset_none:$addr), + "stlb", "\t$Rt, $addr", []>; +def t2STLH : T2Istrrel<0b01, (outs), (ins rGPR:$Rt, addr_offset_none:$addr), + "stlh", "\t$Rt, $addr", []>; + // T2Ipl (Preload Data/Instruction) signals the memory system of possible future // data/instruction access. // instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0), @@ -1520,24 +1610,27 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc, "\t$addr", - [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> { + [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]>, + Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; + let Inst{23} = 1; let Inst{22} = 0; let Inst{21} = write; let Inst{20} = 1; let Inst{15-12} = 0b1111; bits<17> addr; - let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn - let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm12 + + let DecoderMethod = "DecodeT2LoadImm12"; } def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc, "\t$addr", - [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]> { + [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]>, + Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{23} = 0; // U = 0 @@ -1550,11 +1643,14 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { bits<13> addr; let Inst{19-16} = addr{12-9}; // Rn let Inst{7-0} = addr{7-0}; // imm8 + + let DecoderMethod = "DecodeT2LoadImm8"; } def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc, "\t$addr", - [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> { + [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]>, + Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{23} = 0; // add = TRUE for T1 @@ -1562,7 +1658,7 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let Inst{21} = write; let Inst{20} = 1; let Inst{15-12} = 0b1111; - let Inst{11-6} = 0000000; + let Inst{11-6} = 0b000000; bits<10> addr; let Inst{19-16} = addr{9-6}; // Rn @@ -1571,15 +1667,33 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let DecoderMethod = "DecodeT2LoadShift"; } - // FIXME: We should have a separate 'pci' variant here. As-is we represent - // it via the i12 variant, which it's related to, but that means we can - // represent negative immediates, which aren't legal for anything except - // the 'pci' case (Rn == 15). } -defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>; -defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>; -defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>; +defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>; +defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>; +defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>; + +// pci variant is very similar to i12, but supports negative offsets +// from the PC. Only PLD and PLI have pci variants (not PLDW) +class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr), + IIC_Preload, opc, "\t$addr", + [(ARMPreload (ARMWrapper tconstpool:$addr), + (i32 0), (i32 inst))]>, Sched<[WritePreLd]> { + let Inst{31-25} = 0b1111100; + let Inst{24} = inst; + let Inst{22-20} = 0b001; + let Inst{19-16} = 0b1111; + let Inst{15-12} = 0b1111; + + bits<13> addr; + let Inst{23} = addr{12}; // add = (U == '1') + let Inst{11-0} = addr{11-0}; // imm12 + + let DecoderMethod = "DecodeT2LoadLabel"; +} + +def t2PLDpci : T2Iplpci<0, "pld">, Requires<[IsThumb2]>; +def t2PLIpci : T2Iplpci<1, "pli">, Requires<[IsThumb2,HasV7]>; //===----------------------------------------------------------------------===// // Load / store multiple Instructions. @@ -1743,7 +1857,7 @@ defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; let neverHasSideEffects = 1 in def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr, - "mov", ".w\t$Rd, $Rm", []> { + "mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1763,7 +1877,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, AddedComplexity = 1 in def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi, "mov", ".w\t$Rd, $imm", - [(set rGPR:$Rd, t2_so_imm:$imm)]> { + [(set rGPR:$Rd, t2_so_imm:$imm)]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b0010; @@ -1786,7 +1900,7 @@ def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm, let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, "movw", "\t$Rd, $imm", - [(set rGPR:$Rd, imm0_65535:$imm)]> { + [(set rGPR:$Rd, imm0_65535:$imm)]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0010; @@ -1804,6 +1918,9 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, let DecoderMethod = "DecodeT2MOVTWInstruction"; } +def : t2InstAlias<"mov${p} $Rd, $imm", + (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>; + def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; @@ -1812,7 +1929,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$src, imm0_65535_expr:$imm), IIC_iMOVi, "movt", "\t$Rd, $imm", [(set rGPR:$Rd, - (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> { + (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0110; @@ -1831,7 +1949,8 @@ def t2MOVTi16 : T2I<(outs rGPR:$Rd), } def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), - (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; } // Constraints def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; @@ -2171,7 +2290,7 @@ def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), let Uses = [CPSR] in { def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, "rrx", "\t$Rd, $Rm", - [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> { + [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2185,7 +2304,8 @@ let isCodeGenOnly = 1, Defs = [CPSR] in { def t2MOVsrl_flag : T2TwoRegShiftImm< (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, "lsrs", ".w\t$Rd, $Rm, #1", - [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> { + [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2199,7 +2319,8 @@ def t2MOVsrl_flag : T2TwoRegShiftImm< def t2MOVsra_flag : T2TwoRegShiftImm< (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, "asrs", ".w\t$Rd, $Rm, #1", - [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> { + [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2320,7 +2441,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, // shifted imm def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii, opc, "\t$Rd, $imm", - [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> { + [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]>, Sched<[WriteALU]> { let isAsCheapAsAMove = Cheap; let isReMaterializable = ReMat; let isMoveImm = MoveImm; @@ -2333,7 +2454,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, // register def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir, opc, ".w\t$Rd, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rm))]> { + [(set rGPR:$Rd, (opnode rGPR:$Rm))]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -2345,7 +2466,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, // shifted register def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis, opc, ".w\t$Rd, $ShiftedRm", - [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> { + [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]>, + Sched<[WriteALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -2804,22 +2926,27 @@ class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, } def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, - "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>; + "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>, + Sched<[WriteALU]>; def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "rbit", "\t$Rd, $Rm", - [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>; + [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>, + Sched<[WriteALU]>; def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, - "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>; + "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>, + Sched<[WriteALU]>; def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "rev16", ".w\t$Rd, $Rm", - [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>; + [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>, + Sched<[WriteALU]>; def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "revsh", ".w\t$Rd, $Rm", - [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>; + [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>, + Sched<[WriteALU]>; def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)), (and (srl rGPR:$Rm, (i32 8)), 0xFF)), @@ -2831,7 +2958,8 @@ def t2PKHBT : T2ThreeReg< [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF), (and (shl rGPR:$Rm, pkh_lsl_amt:$sh), 0xFFFF0000)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { + Requires<[HasT2ExtractPack, IsThumb2]>, + Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; @@ -2859,7 +2987,8 @@ def t2PKHTB : T2ThreeReg< [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000), (and (sra rGPR:$Rm, pkh_asr_amt:$sh), 0xFFFF)))]>, - Requires<[HasT2ExtractPack, IsThumb2]> { + Requires<[HasT2ExtractPack, IsThumb2]>, + Sched<[WriteALUsi, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; @@ -2873,7 +3002,12 @@ def t2PKHTB : T2ThreeReg< // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)), +// We also can not replace a srl (17..31) by an arithmetic shift we would use in +// pkhtb src1, src2, asr (17..31). +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16:$sh)), + (t2PKHTB rGPR:$src1, rGPR:$src2, imm16:$sh)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (sra rGPR:$src2, imm16_31:$sh)), (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>, Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), @@ -2882,6 +3016,34 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), Requires<[HasT2ExtractPack, IsThumb2]>; //===----------------------------------------------------------------------===// +// CRC32 Instructions +// +// Polynomials: +// + CRC32{B,H,W} 0x04C11DB7 +// + CRC32C{B,H,W} 0x1EDC6F41 +// + +class T2I_crc32<bit C, bits<2> sz, string suffix, SDPatternOperator builtin> + : T2ThreeRegNoP<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), NoItinerary, + !strconcat("crc32", suffix, "\t$Rd, $Rn, $Rm"), + [(set rGPR:$Rd, (builtin rGPR:$Rn, rGPR:$Rm))]>, + Requires<[IsThumb2, HasV8, HasCRC]> { + let Inst{31-27} = 0b11111; + let Inst{26-21} = 0b010110; + let Inst{20} = C; + let Inst{15-12} = 0b1111; + let Inst{7-6} = 0b10; + let Inst{5-4} = sz; +} + +def t2CRC32B : T2I_crc32<0, 0b00, "b", int_arm_crc32b>; +def t2CRC32CB : T2I_crc32<1, 0b00, "cb", int_arm_crc32cb>; +def t2CRC32H : T2I_crc32<0, 0b01, "h", int_arm_crc32h>; +def t2CRC32CH : T2I_crc32<1, 0b01, "ch", int_arm_crc32ch>; +def t2CRC32W : T2I_crc32<0, 0b10, "w", int_arm_crc32w>; +def t2CRC32CW : T2I_crc32<1, 0b10, "cw", int_arm_crc32cw>; + +//===----------------------------------------------------------------------===// // Comparison Instructions... // defm t2CMP : T2I_cmp_irs<0b1101, "cmp", @@ -2900,7 +3062,8 @@ let isCompare = 1, Defs = [CPSR] in { def t2CMNri : T2OneRegCmpImm< (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi, "cmn", ".w\t$Rn, $imm", - [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]> { + [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]>, + Sched<[WriteCMP, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b1000; @@ -2913,7 +3076,7 @@ let isCompare = 1, Defs = [CPSR] in { (outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr, "cmn", ".w\t$Rn, $Rm", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPRnopc:$Rn, rGPR:$Rm)]> { + GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b1000; @@ -2928,7 +3091,8 @@ let isCompare = 1, Defs = [CPSR] in { (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi, "cmn", ".w\t$Rn, $ShiftedRm", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> { + GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>, + Sched<[WriteCMPsi, ReadALU, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b1000; @@ -2959,92 +3123,67 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>; // Conditional moves -// FIXME: should be able to write a pattern for ARMcmov, but can't use -// a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { let isCommutable = 1, isSelect = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, pred:$p), + (ins rGPR:$false, rGPR:$Rm, cmovpred:$p), 4, IIC_iCMOVr, - [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + [(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isMoveImm = 1 in -def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd), - (ins rGPR:$false, t2_so_imm:$imm, pred:$p), +def t2MOVCCi + : t2PseudoInst<(outs rGPR:$Rd), + (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, -[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + [(set rGPR:$Rd, (ARMcmov rGPR:$false,t2_so_imm:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; -// FIXME: Pseudo-ize these. For now, just mark codegen only. let isCodeGenOnly = 1 in { let isMoveImm = 1 in -def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm), - IIC_iCMOVi, - "movw", "\t$Rd, $imm", []>, - RegConstraint<"$false = $Rd"> { - let Inst{31-27} = 0b11110; - let Inst{25} = 1; - let Inst{24-21} = 0b0010; - let Inst{20} = 0; // The S bit. - let Inst{15} = 0; - - bits<4> Rd; - bits<16> imm; - - let Inst{11-8} = Rd; - let Inst{19-16} = imm{15-12}; - let Inst{26} = imm{11}; - let Inst{14-12} = imm{10-8}; - let Inst{7-0} = imm{7-0}; -} +def t2MOVCCi16 + : t2PseudoInst<(outs rGPR:$Rd), + (ins rGPR:$false, imm0_65535_expr:$imm, cmovpred:$p), + 4, IIC_iCMOVi, + [(set rGPR:$Rd, (ARMcmov rGPR:$false, imm0_65535:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isMoveImm = 1 in -def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst), - (ins rGPR:$false, i32imm:$src, pred:$p), - IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">; +def t2MVNCCi + : t2PseudoInst<(outs rGPR:$Rd), + (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p), + 4, IIC_iCMOVi, + [(set rGPR:$Rd, + (ARMcmov rGPR:$false, t2_so_imm_not:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; + +class MOVCCShPseudo<SDPatternOperator opnode, Operand ty> + : t2PseudoInst<(outs rGPR:$Rd), + (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, cmovpred:$p), + 4, IIC_iCMOVsi, + [(set rGPR:$Rd, (ARMcmov rGPR:$false, + (opnode rGPR:$Rm, (i32 ty:$imm)), + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; + +def t2MOVCClsl : MOVCCShPseudo<shl, imm0_31>; +def t2MOVCClsr : MOVCCShPseudo<srl, imm_sr>; +def t2MOVCCasr : MOVCCShPseudo<sra, imm_sr>; +def t2MOVCCror : MOVCCShPseudo<rotr, imm0_31>; let isMoveImm = 1 in -def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm), - IIC_iCMOVi, "mvn", "\t$Rd, $imm", -[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm, - imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd"> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = 0b0011; - let Inst{20} = 0; // The S bit. - let Inst{19-16} = 0b1111; // Rn - let Inst{15} = 0; -} - -class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = 0b0010; - let Inst{20} = 0; // The S bit. - let Inst{19-16} = 0b1111; // Rn - let Inst{5-4} = opcod; // Shift type. -} -def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), - IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>, - RegConstraint<"$false = $Rd">; -def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), - IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>, - RegConstraint<"$false = $Rd">; -def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), - IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>, - RegConstraint<"$false = $Rd">; -def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), - IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>, - RegConstraint<"$false = $Rd">; +def t2MOVCCi32imm + : t2PseudoInst<(outs rGPR:$dst), + (ins rGPR:$false, i32imm:$src, cmovpred:$p), + 8, IIC_iCMOVix2, + [(set rGPR:$dst, (ARMcmov rGPR:$false, imm:$src, + cmovpred:$p))]>, + RegConstraint<"$false = $dst">; } // isCodeGenOnly = 1 } // neverHasSideEffects @@ -3055,40 +3194,38 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, - "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>, - Requires<[IsThumb, HasDB]> { +def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, + "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>, + Requires<[HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f5; let Inst{3-0} = opt; } } -def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, - "dsb", "\t$opt", []>, - Requires<[IsThumb, HasDB]> { +def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, + "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>, + Requires<[HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f4; let Inst{3-0} = opt; } -def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, - "isb", "\t$opt", - []>, Requires<[IsThumb, HasDB]> { +def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary, + "isb", "\t$opt", []>, Requires<[HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f6; let Inst{3-0} = opt; } -class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz, +class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern, bits<4> rt2 = 0b1111> : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0001101; let Inst{11-8} = rt2; - let Inst{7-6} = 0b01; - let Inst{5-4} = opcod; + let Inst{7-4} = opcod; let Inst{3-0} = 0b1111; bits<4> addr; @@ -3096,15 +3233,14 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz, let Inst{19-16} = addr; let Inst{15-12} = Rt; } -class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz, +class T2I_strex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern, bits<4> rt2 = 0b1111> : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0001100; let Inst{11-8} = rt2; - let Inst{7-6} = 0b01; - let Inst{5-4} = opcod; + let Inst{7-4} = opcod; bits<4> Rd; bits<4> addr; @@ -3115,15 +3251,18 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz, } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr), +def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "ldrexb", "\t$Rt, $addr", "", []>; -def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr), + "ldrexb", "\t$Rt, $addr", "", + [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; +def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "ldrexh", "\t$Rt, $addr", "", []>; + "ldrexh", "\t$Rt, $addr", "", + [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>; def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, - "ldrex", "\t$Rt, $addr", "", []> { + "ldrex", "\t$Rt, $addr", "", + [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]> { bits<4> Rt; bits<12> addr; let Inst{31-27} = 0b11101; @@ -3134,7 +3273,7 @@ def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), let Inst{7-0} = addr{7-0}; } let hasExtraDefRegAllocReq = 1 in -def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), +def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", @@ -3142,22 +3281,60 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), bits<4> Rt2; let Inst{11-8} = Rt2; } +def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "ldaexb", "\t$Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]>; +def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "ldaexh", "\t$Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]>; +def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "ldaex", "\t$Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]> { + bits<4> Rt; + bits<4> addr; + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001101; + let Inst{19-16} = addr; + let Inst{15-12} = Rt; + let Inst{11-8} = 0b1111; + let Inst{7-0} = 0b11101111; +} +let hasExtraDefRegAllocReq = 1 in +def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2), + (ins addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "ldaexd", "\t$Rt, $Rt2, $addr", "", + [], {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> { + bits<4> Rt2; + let Inst{11-8} = Rt2; + + let Inst{7} = 1; +} } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), +def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "strexb", "\t$Rd, $Rt, $addr", "", []>; -def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), + "strexb", "\t$Rd, $Rt, $addr", "", + [(set rGPR:$Rd, (strex_1 rGPR:$Rt, + addr_offset_none:$addr))]>; +def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "strexh", "\t$Rd, $Rt, $addr", "", []>; + "strexh", "\t$Rd, $Rt, $addr", "", + [(set rGPR:$Rd, (strex_2 rGPR:$Rt, + addr_offset_none:$addr))]>; + def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, "strex", "\t$Rd, $Rt, $addr", "", - []> { + [(set rGPR:$Rd, (strex_4 rGPR:$Rt, + t2addrmode_imm0_1020s4:$addr))]> { bits<4> Rd; bits<4> Rt; bits<12> addr; @@ -3169,7 +3346,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, let Inst{7-0} = addr{7-0}; } let hasExtraSrcRegAllocReq = 1 in -def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), +def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], @@ -3177,9 +3354,45 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), bits<4> Rt2; let Inst{11-8} = Rt2; } +def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd), + (ins rGPR:$Rt, addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "stlexb", "\t$Rd, $Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]>; + +def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd), + (ins rGPR:$Rt, addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "stlexh", "\t$Rd, $Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]>; + +def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, + addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "stlex", "\t$Rd, $Rt, $addr", "", + []>, Requires<[IsThumb, HasV8]> { + bits<4> Rd; + bits<4> Rt; + bits<4> addr; + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001100; + let Inst{19-16} = addr; + let Inst{15-12} = Rt; + let Inst{11-4} = 0b11111110; + let Inst{3-0} = Rd; +} +let hasExtraSrcRegAllocReq = 1 in +def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd), + (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), + AddrModeNone, 4, NoItinerary, + "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], + {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> { + bits<4> Rt2; + let Inst{11-8} = Rt2; +} } -def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, +def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>, Requires<[IsThumb2, HasV7]> { let Inst{31-16} = 0xf3bf; let Inst{15-14} = 0b10; @@ -3190,6 +3403,15 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, let Inst{3-0} = 0b1111; } +def : T2Pat<(and (ldrex_1 addr_offset_none:$addr), 0xff), + (t2LDREXB addr_offset_none:$addr)>; +def : T2Pat<(and (ldrex_2 addr_offset_none:$addr), 0xffff), + (t2LDREXH addr_offset_none:$addr)>; +def : T2Pat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (t2STREXB GPR:$Rt, addr_offset_none:$addr)>; +def : T2Pat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (t2STREXH GPR:$Rt, addr_offset_none:$addr)>; + //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics // eh_sjlj_setjmp() is an instruction sequence to store the return @@ -3243,35 +3465,39 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, "b", ".w\t$target", - [(br bb:$target)]> { + [(br bb:$target)]>, Sched<[WriteBr]> { let Inst{31-27} = 0b11110; let Inst{15-14} = 0b10; let Inst{12} = 1; bits<24> target; - let Inst{26} = target{19}; - let Inst{11} = target{18}; - let Inst{13} = target{17}; + let Inst{26} = target{23}; + let Inst{13} = target{22}; + let Inst{11} = target{21}; let Inst{25-16} = target{20-11}; let Inst{10-0} = target{10-0}; let DecoderMethod = "DecodeT2BInstruction"; -} + let AsmMatchConverter = "cvtThumbBranches"; +} let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : t2PseudoInst<(outs), (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, - [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>; + [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>, + Sched<[WriteBr]>; // FIXME: Add a non-pc based case that can be predicated. def t2TBB_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>; + (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>, + Sched<[WriteBr]>; def t2TBH_JT : t2PseudoInst<(outs), - (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>; + (ins GPR:$index, i32imm:$jt, i32imm:$id), 0, IIC_Br, []>, + Sched<[WriteBr]>; def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br, - "tbb", "\t$addr", []> { + "tbb", "\t$addr", []>, Sched<[WriteBrTbl]> { bits<4> Rn; bits<4> Rm; let Inst{31-20} = 0b111010001101; @@ -3284,7 +3510,7 @@ def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br, } def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br, - "tbh", "\t$addr", []> { + "tbh", "\t$addr", []>, Sched<[WriteBrTbl]> { bits<4> Rn; bits<4> Rm; let Inst{31-20} = 0b111010001101; @@ -3304,7 +3530,7 @@ def t2TBH : T2I<(outs), (ins addrmode_tbh:$addr), IIC_Br, let isBranch = 1, isTerminator = 1 in def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, "b", ".w\t$target", - [/*(ARMbrcond bb:$target, imm:$cc)*/]> { + [/*(ARMbrcond bb:$target, imm:$cc)*/]>, Sched<[WriteBr]> { let Inst{31-27} = 0b11110; let Inst{15-14} = 0b10; let Inst{12} = 0; @@ -3320,6 +3546,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, let Inst{10-0} = target{11-1}; let DecoderMethod = "DecodeThumb2BCCInstruction"; + let AsmMatchConverter = "cvtThumbBranches"; } // Tail calls. The IOS version of thumb tail calls uses a t2 branch, so @@ -3331,14 +3558,15 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { (ins uncondbrtarget:$dst, pred:$p), 4, IIC_Br, [], (t2B uncondbrtarget:$dst, pred:$p)>, - Requires<[IsThumb2, IsIOS]>; + Requires<[IsThumb2, IsIOS]>, Sched<[WriteBr]>; } // IT block let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), AddrModeNone, 2, IIC_iALUx, - "it$mask\t$cc", "", []> { + "it$mask\t$cc", "", []>, + ComplexDeprecationPredicate<"IT"> { // 16-bit instruction. let Inst{31-16} = 0x0000; let Inst{15-8} = 0b10111111; @@ -3353,7 +3581,8 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} -def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []> { +def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>, + Sched<[WriteBr]> { bits<4> func; let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -3367,7 +3596,7 @@ let isBranch = 1, isTerminator = 1 in { def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, "cbz\t$Rn, $target", []>, T1Misc<{0,0,?,1,?,?,?}>, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteBr]> { // A8.6.27 bits<6> target; bits<3> Rn; @@ -3379,7 +3608,7 @@ let isBranch = 1, isTerminator = 1 in { def tCBNZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, "cbnz\t$Rn, $target", []>, T1Misc<{1,0,?,1,?,?,?}>, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteBr]> { // A8.6.27 bits<6> target; bits<3> Rn; @@ -3411,27 +3640,34 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary, let M = 1 in def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode), - "$imod.w\t$iflags, $mode">; + "$imod\t$iflags, $mode">; let mode = 0, M = 0 in def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod.w\t$iflags">; let imod = 0, iflags = 0, M = 1 in def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">; +def : t2InstAlias<"cps$imod.w $iflags, $mode", + (t2CPS3p imod_op:$imod, iflags_op:$iflags, i32imm:$mode), 0>; +def : t2InstAlias<"cps.w $mode", (t2CPS1p imm0_31:$mode), 0>; + // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions -def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> { - bits<3> imm; +def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",[]> { + bits<8> imm; let Inst{31-3} = 0b11110011101011111000000000000; - let Inst{2-0} = imm; + let Inst{7-0} = imm; } -def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_4:$imm, pred:$p)>; +def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p)>; def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>; def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>; def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>; def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>; def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>; +def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> { + let Predicates = [IsThumb2, HasV8]; +} def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { bits<4> opt; @@ -3454,6 +3690,20 @@ def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", let Inst{19-16} = opt; } +class T2DCPS<bits<2> opt, string opc> + : T2I<(outs), (ins), NoItinerary, opc, "", []>, Requires<[IsThumb2, HasV8]> { + let Inst{31-27} = 0b11110; + let Inst{26-20} = 0b1111000; + let Inst{19-16} = 0b1111; + let Inst{15-12} = 0b1000; + let Inst{11-2} = 0b0000000000; + let Inst{1-0} = opt; +} + +def t2DCPS1 : T2DCPS<0b01, "dcps1">; +def t2DCPS2 : T2DCPS<0b10, "dcps2">; +def t2DCPS3 : T2DCPS<0b11, "dcps3">; + class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : T2I<oops, iops, itin, opc, asm, pattern> { @@ -3508,6 +3758,19 @@ def t2RFEIA : T2RFE<0b111010011001, (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn", [/* For disassembly only; pattern left blank */]>; +// B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction. +// Exception return instruction is "subs pc, lr, #imm". +let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in +def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary, + "subs", "\tpc, lr, $imm", + [(ARMintretflag imm0_255:$imm)]>, + Requires<[IsThumb2]> { + let Inst{31-8} = 0b111100111101111010001111; + + bits<8> imm; + let Inst{7-0} = imm; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // @@ -3591,7 +3854,7 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> { let DecoderMethod = "DecodeCopMemInstruction"; } def _PRE : T2CI<op31_28, - (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), asm, "\t$cop, $CRd, $addr!"> { bits<13> addr; bits<4> cop; @@ -3651,10 +3914,10 @@ defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">; defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">; defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">; defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">; -defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">; -defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">; -defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">; -defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">; +defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8]>; +defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8]>; +defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8]>; +defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8]>; //===----------------------------------------------------------------------===// @@ -3666,7 +3929,7 @@ defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">; // // A/R class can only move from CPSR or SPSR. def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", - []>, Requires<[IsThumb2,IsARClass]> { + []>, Requires<[IsThumb2,IsNotMClass]> { bits<4> Rd; let Inst{31-12} = 0b11110011111011111000; let Inst{11-8} = Rd; @@ -3676,7 +3939,7 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>; def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", - []>, Requires<[IsThumb2,IsARClass]> { + []>, Requires<[IsThumb2,IsNotMClass]> { bits<4> Rd; let Inst{31-12} = 0b11110011111111111000; let Inst{11-8} = Rd; @@ -3709,7 +3972,7 @@ def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary, // the mask with the fields to be accessed in the special register. def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn), NoItinerary, "msr", "\t$mask, $Rn", []>, - Requires<[IsThumb2,IsARClass]> { + Requires<[IsThumb2,IsNotMClass]> { bits<5> mask; bits<4> Rn; let Inst{31-21} = 0b11110011100; @@ -3742,8 +4005,7 @@ def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn), class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops, list<dag> pattern> - : T2Cop<Op, oops, iops, - !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), + : T2Cop<Op, oops, iops, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", pattern> { let Inst{27-24} = 0b1110; let Inst{20} = direction; @@ -3768,7 +4030,7 @@ class t2MovRRCopro<bits<4> Op, string opc, bit direction, list<dag> pattern = []> : T2Cop<Op, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { + opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { let Inst{27-24} = 0b1100; let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -3792,33 +4054,38 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0, (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>; -def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm", + imm:$CRm, imm:$opc2)]>, + ComplexDeprecationPredicate<"MCR">; +def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, 0)>; + c_imm:$CRm, 0, pred:$p)>; def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>; -def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", + imm:$CRm, imm:$opc2)]> { + let Predicates = [IsThumb2, PreV8]; +} +def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, 0)>; + c_imm:$CRm, 0, pred:$p)>; /* from coprocessor to ARM core register */ def t2MRC : t2MovRCopro<0b1110, "mrc", 1, - (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; -def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm", - (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, - c_imm:$CRm, 0)>; +def : t2InstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", + (t2MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1, - (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, - c_imm:$CRm, imm0_7:$opc2), []>; -def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", - (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, - c_imm:$CRm, 0)>; + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, imm0_7:$opc2), []> { + let Predicates = [IsThumb2, PreV8]; +} +def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm", + (t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; @@ -3833,19 +4100,24 @@ def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, imm:$CRm)]>; def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, - GPR:$Rt2, imm:$CRm)]>; + GPR:$Rt2, imm:$CRm)]> { + let Predicates = [IsThumb2, PreV8]; +} + /* from coprocessor to ARM core register */ def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>; -def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>; +def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1> { + let Predicates = [IsThumb2, PreV8]; +} //===----------------------------------------------------------------------===// // Other Coprocessor Instructions. // -def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, +def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]> { let Inst{27-24} = 0b1110; @@ -3864,11 +4136,13 @@ def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{15-12} = CRd; let Inst{19-16} = CRn; let Inst{23-20} = opc1; + + let Predicates = [IsThumb2, PreV8]; } def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + "cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]> { let Inst{27-24} = 0b1110; @@ -3887,6 +4161,8 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{15-12} = CRd; let Inst{19-16} = CRn; let Inst{23-20} = opc1; + + let Predicates = [IsThumb2, PreV8]; } @@ -3960,6 +4236,15 @@ def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val), def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val), (t2STRs GPR:$val, t2addrmode_so_reg:$addr)>; +let AddedComplexity = 8 in { + def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>; + def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>; + def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>; + def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>; + def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>; + def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>; +} + //===----------------------------------------------------------------------===// // Assembler aliases @@ -3981,7 +4266,8 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm", // Aliases for ADD without the ".w" optional width specifier. def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", - (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias<"add${p} $Rd, $Rn, $imm", (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm", @@ -4056,9 +4342,9 @@ def : t2InstAlias<"tst${p} $Rn, $Rm", (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; // Memory barriers -def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb, HasDB]>; -def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb, HasDB]>; -def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb, HasDB]>; +def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p)>, Requires<[HasDB]>; +def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p)>, Requires<[HasDB]>; +def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p)>, Requires<[HasDB]>; // Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional // width specifier. @@ -4085,7 +4371,7 @@ def : t2InstAlias<"ldrsh${p} $Rt, $addr", (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; def : t2InstAlias<"ldr${p} $Rt, $addr", - (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>; + (t2LDRpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; def : t2InstAlias<"ldrb${p} $Rt, $addr", (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; def : t2InstAlias<"ldrh${p} $Rt, $addr", @@ -4247,16 +4533,16 @@ def : t2InstAlias<"mvn${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; // Same for AND <--> BIC def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm", - (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + (t2ANDri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"bic${s}${p} $Rdn, $imm", - (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + (t2ANDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm", - (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + (t2BICri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"and${s}${p} $Rdn, $imm", - (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + (t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; // Likewise, "add Rd, t2_so_imm_neg" -> sub def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", @@ -4298,7 +4584,7 @@ def : t2InstAlias<"adr${p} $Rd, $addr", // LDR(literal) w/ alternate [pc, #imm] syntax. def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr", - (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; + (ins GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr", (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr", @@ -4309,7 +4595,7 @@ def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr", (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; // Version w/ the .w suffix. def : t2InstAlias<"ldr${p}.w $Rt, $addr", - (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; + (t2LDRpcrel GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p), 0>; def : t2InstAlias<"ldrb${p}.w $Rt, $addr", (t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; def : t2InstAlias<"ldrh${p}.w $Rt, $addr", @@ -4321,3 +4607,10 @@ def : t2InstAlias<"ldrsh${p}.w $Rt, $addr", def : t2InstAlias<"add${p} $Rd, pc, $imm", (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>; + +// PLD/PLDW/PLI with alternate literal form. +def : t2InstAlias<"pld${p} $addr", + (t2PLDpci t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : InstAlias<"pli${p} $addr", + (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p)>, + Requires<[IsThumb2,HasV7]>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index b5a896c69985..a8cdc5ca0637 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -224,7 +224,36 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r", defm : VFPDTAnyInstAlias<"vpop${p}", "$r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>; -// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores +// FLDMX, FSTMX - Load and store multiple unknown precision registers for +// pre-armv6 cores. +// These instruction are deprecated so we don't want them to get selected. +multiclass vfp_ldstx_mult<string asm, bit L_bit> { + // Unknown precision + def XIA : + AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeNone, !strconcat(asm, "iax${p}\t$Rn, $regs"), "", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def XIA_UPD : + AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeUpd, !strconcat(asm, "iax${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + } + def XDB_UPD : + AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeUpd, !strconcat(asm, "dbx${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b10; // Decrement Before + let Inst{21} = 1; + let Inst{20} = L_bit; + } +} + +defm FLDM : vfp_ldstx_mult<"fldm", 1>; +defm FSTM : vfp_ldstx_mult<"fstm", 0>; //===----------------------------------------------------------------------===// // FP Binary Operations. @@ -304,9 +333,52 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, let D = VFPNeonA8Domain; } +multiclass vsel_inst<string op, bits<2> opc, int CC> { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "", + Uses = [CPSR], AddedComplexity = 4 in { + def S : ASbInp<0b11100, opc, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), + [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>, + Requires<[HasFPARMv8]>; + + def D : ADbInp<0b11100, opc, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"), + [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>, + Requires<[HasFPARMv8, HasDPVFP]>; + } +} + +// The CC constants here match ARMCC::CondCodes. +defm VSELGT : vsel_inst<"gt", 0b11, 12>; +defm VSELGE : vsel_inst<"ge", 0b10, 10>; +defm VSELEQ : vsel_inst<"eq", 0b00, 0>; +defm VSELVS : vsel_inst<"vs", 0b01, 6>; + +multiclass vmaxmin_inst<string op, bit opc, SDNode SD> { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { + def S : ASbInp<0b11101, 0b00, opc, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"), + [(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>, + Requires<[HasFPARMv8]>; + + def D : ADbInp<0b11101, 0b00, opc, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"), + [(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>, + Requires<[HasFPARMv8, HasDPVFP]>; + } +} + +defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>; +defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>; + // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), - (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; + (VNMULD DPR:$a, DPR:$b)>, + Requires<[NoHonorSignDependentRounding,HasDPVFP]>; def : Pat<(fmul (fneg SPR:$a), SPR:$b), (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; @@ -437,9 +509,11 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, let Inst{11-8} = 0b1011; let Inst{7-6} = 0b11; let Inst{4} = 0; + + let Predicates = [HasVFP2, HasDPVFP]; } -// Between half-precision and single-precision. For disassembly only. +// Between half, single and double-precision. For disassembly only. // FIXME: Verify encoding after integrated assembler is working. def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), @@ -464,6 +538,111 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; +def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, + (outs DPR:$Dd), (ins SPR:$Sm), + NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; +} + +def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; +} + +def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, + (outs DPR:$Dd), (ins SPR:$Sm), + NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; +} + +def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; +} + +multiclass vcvt_inst<string opc, bits<2> rm> { + let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), + []>, Requires<[HasFPARMv8]> { + let Inst{17-16} = rm; + } + + def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), + []>, Requires<[HasFPARMv8]> { + let Inst{17-16} = rm; + } + + def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), + []>, Requires<[HasFPARMv8, HasDPVFP]> { + bits<5> Dm; + + let Inst{17-16} = rm; + + // Encode instruction operands + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{8} = 1; + } + + def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), + []>, Requires<[HasFPARMv8, HasDPVFP]> { + bits<5> Dm; + + let Inst{17-16} = rm; + + // Encode instruction operands + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{8} = 1; + } + } +} + +defm VCVTA : vcvt_inst<"a", 0b00>; +defm VCVTN : vcvt_inst<"n", 0b01>; +defm VCVTP : vcvt_inst<"p", 0b10>; +defm VCVTM : vcvt_inst<"m", 0b11>; + def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", @@ -478,6 +657,63 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, let D = VFPNeonA8Domain; } +multiclass vrint_inst_zrx<string opc, bit op, bit op2> { + def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", + []>, Requires<[HasFPARMv8]> { + let Inst{7} = op2; + let Inst{16} = op; + } + def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + let Inst{7} = op2; + let Inst{16} = op; + } + + def : InstAlias<!strconcat("vrint", opc, "$p.f32.f32\t$Sd, $Sm"), + (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>, + Requires<[HasFPARMv8]>; + def : InstAlias<!strconcat("vrint", opc, "$p.f64.f64\t$Dd, $Dm"), + (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p)>, + Requires<[HasFPARMv8,HasDPVFP]>; +} + +defm VRINTZ : vrint_inst_zrx<"z", 0, 1>; +defm VRINTR : vrint_inst_zrx<"r", 0, 0>; +defm VRINTX : vrint_inst_zrx<"x", 1, 0>; + +multiclass vrint_inst_anpm<string opc, bits<2> rm> { + let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), + []>, Requires<[HasFPARMv8]> { + let Inst{17-16} = rm; + } + def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"), + []>, Requires<[HasFPARMv8, HasDPVFP]> { + let Inst{17-16} = rm; + } + } + + def : InstAlias<!strconcat("vrint", opc, ".f32.f32\t$Sd, $Sm"), + (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm)>, + Requires<[HasFPARMv8]>; + def : InstAlias<!strconcat("vrint", opc, ".f64.f64\t$Dd, $Dm"), + (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>, + Requires<[HasFPARMv8,HasDPVFP]>; +} + +defm VRINTA : vrint_inst_anpm<"a", 0b00>; +defm VRINTN : vrint_inst_anpm<"n", 0b01>; +defm VRINTP : vrint_inst_anpm<"p", 0b10>; +defm VRINTM : vrint_inst_anpm<"m", 0b11>; + def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", @@ -667,6 +903,8 @@ class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{5} = Sm{0}; let Inst{15-12} = Dd{3-0}; let Inst{22} = Dd{4}; + + let Predicates = [HasVFP2, HasDPVFP]; } class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, @@ -738,6 +976,8 @@ class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{5} = Dm{4}; let Inst{15-12} = Sd{4-1}; let Inst{22} = Sd{0}; + + let Predicates = [HasVFP2, HasDPVFP]; } class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, @@ -841,7 +1081,8 @@ let Constraints = "$a = $dst" in { class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>, + Sched<[WriteCvtFP]> { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{0}; @@ -852,11 +1093,14 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>, + Sched<[WriteCvtFP]> { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{4}; let Inst{15-12} = dst{3-0}; + + let Predicates = [HasVFP2, HasDPVFP]; } def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, @@ -969,7 +1213,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -985,7 +1229,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>; @@ -996,7 +1240,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1012,7 +1256,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1023,7 +1267,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1039,7 +1283,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1050,7 +1294,7 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1065,7 +1309,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1079,7 +1323,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFMAS : ASbIn<0b11101, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1094,7 +1338,7 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1103,7 +1347,7 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), // (fma x, y, z) -> (vfms z, x, y) def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)), (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)), (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1114,7 +1358,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1129,7 +1373,7 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1138,14 +1382,14 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), // (fma (fneg x), y, z) -> (vfms z, x, y) def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)), (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)), (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fma x, (fneg y), z) -> (vfms z, x, y) def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)), (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)), (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1156,7 +1400,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1171,7 +1415,7 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1180,14 +1424,14 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), // (fneg (fma x, y, z)) -> (vfnma z, x, y) def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y) def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1198,7 +1442,7 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1212,7 +1456,7 @@ def VFNMSS : ASbI<0b11101, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1222,21 +1466,21 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), // (fma x, y, (fneg z)) -> (vfnms z, x, y)) def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y) def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fneg (fma x, (fneg y), z) -> (vfnms z, x, y) def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1246,15 +1490,17 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), // let neverHasSideEffects = 1 in { -def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p), - 4, IIC_fpUNA64, - [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, - RegConstraint<"$Dn = $Dd">; - -def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p), - 4, IIC_fpUNA32, - [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, - RegConstraint<"$Sn = $Sd">; +def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p), + IIC_fpUNA64, + [(set (f64 DPR:$Dd), + (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>, + RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>; + +def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p), + IIC_fpUNA32, + [(set (f32 SPR:$Sd), + (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>, + RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>; } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -1300,6 +1546,12 @@ let Uses = [FPSCR] in { "vmrs", "\t$Rt, mvfr0", []>; def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins), "vmrs", "\t$Rt, mvfr1", []>; + def VMRS_MVFR2 : MovFromVFP<0b0101 /* mvfr2 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, mvfr2", []>, Requires<[HasFPARMv8]>; + def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, fpinst", []>; + def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, fpinst2", []>; } //===----------------------------------------------------------------------===// @@ -1333,6 +1585,11 @@ let Defs = [FPSCR] in { // System level GPR -> FPSID def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src), "vmsr", "\tfpsid, $src", []>; + + def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPR:$src), + "vmsr", "\tfpinst, $src", []>; + def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPR:$src), + "vmsr", "\tfpinst2, $src", []>; } //===----------------------------------------------------------------------===// @@ -1344,7 +1601,8 @@ let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_fpUNA64, "vmov", ".f64\t$Dd, $imm", - [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { + [(set DPR:$Dd, vfp_f64imm:$imm)]>, + Requires<[HasVFP3,HasDPVFP]> { bits<5> Dd; bits<8> imm; @@ -1426,23 +1684,23 @@ def : VFP2MnemonicAlias<"fmrx", "vmrs">; def : VFP2MnemonicAlias<"fmxr", "vmsr">; // Be friendly and accept the old form of zero-compare -def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>; +def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>; def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>; def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; -def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm", - (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm", + (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm", (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; -def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm", - (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2DPInstAlias<"fsubd${p} $Dd, $Dn, $Dm", + (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; // No need for the size suffix on VSQRT. It's implied by the register classes. def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>; -def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>; +def : VFP2DPInstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>; // VLDR/VSTR accept an optional type suffix. def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr", diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c8ed5760f935..61596d54b692 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -90,6 +90,10 @@ namespace { typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; + void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, + const MemOpQueue &MemOps, unsigned DefReg, + unsigned RangeBegin, unsigned RangeEnd); + bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, @@ -109,12 +113,12 @@ namespace { unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector<MachineBasicBlock::iterator, 4> &Merges); + SmallVectorImpl<MachineBasicBlock::iterator> &Merges); void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, int Opcode, unsigned Size, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, - SmallVector<MachineBasicBlock::iterator, 4> &Merges); + SmallVectorImpl<MachineBasicBlock::iterator> &Merges); void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, @@ -360,6 +364,62 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return true; } +/// \brief Find all instructions using a given imp-def within a range. +/// +/// We are trying to combine a range of instructions, one of which (located at +/// position RangeBegin) implicitly defines a register. The final LDM/STM will +/// be placed at RangeEnd, and so any uses of this definition between RangeStart +/// and RangeEnd must be modified to use an undefined value. +/// +/// The live range continues until we find a second definition or one of the +/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so +/// we must consider all uses and decide which are relevant in a second pass. +void ARMLoadStoreOpt::findUsesOfImpDef( + SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, + unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { + std::map<unsigned, MachineOperand *> Uses; + unsigned LastLivePos = RangeEnd; + + // First we find all uses of this register with Position between RangeBegin + // and RangeEnd, any or all of these could be uses of a definition at + // RangeBegin. We also record the latest position a definition at RangeBegin + // would be considered live. + for (unsigned i = 0; i < MemOps.size(); ++i) { + MachineInstr &MI = *MemOps[i].MBBI; + unsigned MIPosition = MemOps[i].Position; + if (MIPosition <= RangeBegin || MIPosition > RangeEnd) + continue; + + // If this instruction defines the register, then any later use will be of + // that definition rather than ours. + if (MI.definesRegister(DefReg)) + LastLivePos = std::min(LastLivePos, MIPosition); + + MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); + if (!UseOp) + continue; + + // If this instruction kills the register then (assuming liveness is + // correct when we start) we don't need to think about anything after here. + if (UseOp->isKill()) + LastLivePos = std::min(LastLivePos, MIPosition); + + Uses[MIPosition] = UseOp; + } + + // Now we traverse the list of all uses, and append the ones that actually use + // our definition to the requested list. + for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), + E = Uses.end(); + I != E; ++I) { + // List is sorted by position so once we've found one out of range there + // will be no more to consider. + if (I->first > LastLivePos) + break; + UsesOfImpDefs.push_back(I->second); + } +} + // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on // success. void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, @@ -371,7 +431,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector<MachineBasicBlock::iterator, 4> &Merges) { + SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { // First calculate which of the registers should be killed by the merged // instruction. const unsigned insertPos = memOps[insertAfter].Position; @@ -392,6 +452,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, SmallVector<std::pair<unsigned, bool>, 8> Regs; SmallVector<unsigned, 8> ImpDefs; + SmallVector<MachineOperand *, 8> UsesOfImpDefs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { unsigned Reg = memOps[i].Reg; // If we are inserting the merged operation after an operation that @@ -406,6 +467,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, unsigned DefReg = MO->getReg(); if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) ImpDefs.push_back(DefReg); + + // There may be other uses of the definition between this instruction and + // the eventual LDM/STM position. These should be marked undef if the + // merge takes place. + findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, + insertPos); } } @@ -418,6 +485,16 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, // Merge succeeded, update records. Merges.push_back(prior(Loc)); + + // In gathering loads together, we may have moved the imp-def of a register + // past one of its uses. This is OK, since we know better than the rest of + // LLVM what's OK with ARM loads and stores; but we still have to adjust the + // affected uses. + for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), + E = UsesOfImpDefs.end(); + I != E; ++I) + (*I)->setIsUndef(); + for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { // Remove kill flags from any memops that come before insertPos. if (Regs[i-memOpsBegin].second) { @@ -444,10 +521,10 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, /// load / store multiple instructions. void ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, - unsigned Base, int Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps, - SmallVector<MachineBasicBlock::iterator, 4> &Merges) { + unsigned Base, int Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps, + SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); int Offset = MemOps[SIndex].Offset; int SOffset = Offset; @@ -489,7 +566,10 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, if (Reg != ARM::SP && NewOffset == Offset + (int)Size && ((isNotVFP && RegNum > PRegNum) || - ((Count < Limit) && RegNum == PRegNum+1))) { + ((Count < Limit) && RegNum == PRegNum+1)) && + // On Swift we don't want vldm/vstm to start with a odd register num + // because Q register unaligned vldm/vstm need more uops. + (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { Offset += Size; PRegNum = RegNum; ++Count; @@ -1484,7 +1564,7 @@ namespace { unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2); bool RescheduleOps(MachineBasicBlock *MBB, - SmallVector<MachineInstr*, 4> &Ops, + SmallVectorImpl<MachineInstr *> &Ops, unsigned Base, bool isLd, DenseMap<MachineInstr*, unsigned> &MI2LocMap); bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); @@ -1602,8 +1682,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; // Make sure the base address satisfies i64 ld / st alignment requirement. + // At the moment, we ignore the memoryoperand's value. + // If we want to use AliasAnalysis, we should check it accordingly. if (!Op0->hasOneMemOperand() || - !(*Op0->memoperands_begin())->getValue() || (*Op0->memoperands_begin())->isVolatile()) return false; @@ -1655,7 +1736,7 @@ namespace { } bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, - SmallVector<MachineInstr*, 4> &Ops, + SmallVectorImpl<MachineInstr *> &Ops, unsigned Base, bool isLd, DenseMap<MachineInstr*, unsigned> &MI2LocMap) { bool RetVal = false; @@ -1857,9 +1938,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!StopHere) BI->second.push_back(MI); } else { - SmallVector<MachineInstr*, 4> MIs; - MIs.push_back(MI); - Base2LdsMap[Base] = MIs; + Base2LdsMap[Base].push_back(MI); LdBases.push_back(Base); } } else { @@ -1875,9 +1954,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!StopHere) BI->second.push_back(MI); } else { - SmallVector<MachineInstr*, 4> MIs; - MIs.push_back(MI); - Base2StsMap[Base] = MIs; + Base2StsMap[Base].push_back(MI); StBases.push_back(Base); } } @@ -1893,7 +1970,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { // Re-schedule loads. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { unsigned Base = LdBases[i]; - SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; + SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; if (Lds.size() > 1) RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); } @@ -1901,7 +1978,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { // Re-schedule stores. for (unsigned i = 0, e = StBases.size(); i != e; ++i) { unsigned Base = StBases[i]; - SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; + SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; if (Sts.size() > 1) RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); } diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index b6414832003d..e12c9c61ab14 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -82,7 +82,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MO.getMBB()->getSymbol(), OutContext)); break; case MachineOperand::MO_GlobalAddress: - MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal())); + MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal())); break; case MachineOperand::MO_ExternalSymbol: MCOp = GetSymbolRef(MO, diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index f4248fcfcc75..010edf33cea4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -36,6 +36,13 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// 'isThumb'. bool hasThumb2; + /// StByValParamsPadding - For parameter that is split between + /// GPRs and memory; while recovering GPRs part, when + /// StackAlignment == 8, and GPRs-part-size mod 8 != 0, + /// we need to insert gap before parameter start address. It allows to + /// "attach" GPR-part to the part that was passed via stack. + unsigned StByValParamsPadding; + /// VarArgsRegSaveSize - Size of the register save area for vararg functions. /// unsigned ArgRegsSaveSize; @@ -77,12 +84,6 @@ class ARMFunctionInfo : public MachineFunctionInfo { unsigned GPRCS2Size; unsigned DPRCSSize; - /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices - /// which belong to these spill areas. - BitVector GPRCS1Frames; - BitVector GPRCS2Frames; - BitVector DPRCSFrames; - /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in /// the aligned portion of the stack frame. This is always a contiguous /// sequence of D-registers starting from d8. @@ -121,7 +122,6 @@ public: LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), - GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), NumAlignedDPRCS2Regs(0), JumpTableUId(0), PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} @@ -129,11 +129,11 @@ public: explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), + StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), - GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), JumpTableUId(0), PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} @@ -141,7 +141,14 @@ public: bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } bool isThumb2Function() const { return isThumb && hasThumb2; } - unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; } + unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; } + void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; } + + unsigned getArgRegsSaveSize(unsigned Align = 0) const { + if (!Align) + return ArgRegsSaveSize; + return (ArgRegsSaveSize + Align - 1) & ~(Align - 1); + } void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; } bool hasStackFrame() const { return HasStackFrame; } @@ -175,59 +182,6 @@ public: void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; } - bool isGPRCalleeSavedArea1Frame(int fi) const { - if (fi < 0 || fi >= (int)GPRCS1Frames.size()) - return false; - return GPRCS1Frames[fi]; - } - bool isGPRCalleeSavedArea2Frame(int fi) const { - if (fi < 0 || fi >= (int)GPRCS2Frames.size()) - return false; - return GPRCS2Frames[fi]; - } - bool isDPRCalleeSavedAreaFrame(int fi) const { - if (fi < 0 || fi >= (int)DPRCSFrames.size()) - return false; - return DPRCSFrames[fi]; - } - - void addGPRCalleeSavedArea1Frame(int fi) { - if (fi >= 0) { - int Size = GPRCS1Frames.size(); - if (fi >= Size) { - Size *= 2; - if (fi >= Size) - Size = fi+1; - GPRCS1Frames.resize(Size); - } - GPRCS1Frames[fi] = true; - } - } - void addGPRCalleeSavedArea2Frame(int fi) { - if (fi >= 0) { - int Size = GPRCS2Frames.size(); - if (fi >= Size) { - Size *= 2; - if (fi >= Size) - Size = fi+1; - GPRCS2Frames.resize(Size); - } - GPRCS2Frames[fi] = true; - } - } - void addDPRCalleeSavedAreaFrame(int fi) { - if (fi >= 0) { - int Size = DPRCSFrames.size(); - if (fi >= Size) { - Size *= 2; - if (fi >= Size) - Size = fi+1; - DPRCSFrames.resize(Size); - } - DPRCSFrames[fi] = true; - } - } - unsigned createJumpTableUId() { return JumpTableUId++; } diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp index 6f3819afd0a5..a7880364d89a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp @@ -18,7 +18,6 @@ using namespace llvm; void ARMRegisterInfo::anchor() { } -ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &sti) - : ARMBaseRegisterInfo(tii, sti) { +ARMRegisterInfo::ARMRegisterInfo(const ARMSubtarget &sti) + : ARMBaseRegisterInfo(sti) { } diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h index 8a248425c33c..fb1537cf9425 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h @@ -19,13 +19,13 @@ #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { - class ARMSubtarget; - class ARMBaseInstrInfo; + +class ARMSubtarget; struct ARMRegisterInfo : public ARMBaseRegisterInfo { virtual void anchor(); public: - ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); + ARMRegisterInfo(const ARMSubtarget &STI); }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td index b0f576bc2b6f..d0457618ef6f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -27,31 +27,31 @@ class ARMFReg<bits<16> Enc, string n> : Register<n> { // Subregister indices. let Namespace = "ARM" in { -def qqsub_0 : SubRegIndex; -def qqsub_1 : SubRegIndex; +def qqsub_0 : SubRegIndex<256>; +def qqsub_1 : SubRegIndex<256, 256>; // Note: Code depends on these having consecutive numbers. -def qsub_0 : SubRegIndex; -def qsub_1 : SubRegIndex; -def qsub_2 : SubRegIndex<[qqsub_1, qsub_0]>; -def qsub_3 : SubRegIndex<[qqsub_1, qsub_1]>; - -def dsub_0 : SubRegIndex; -def dsub_1 : SubRegIndex; -def dsub_2 : SubRegIndex<[qsub_1, dsub_0]>; -def dsub_3 : SubRegIndex<[qsub_1, dsub_1]>; -def dsub_4 : SubRegIndex<[qsub_2, dsub_0]>; -def dsub_5 : SubRegIndex<[qsub_2, dsub_1]>; -def dsub_6 : SubRegIndex<[qsub_3, dsub_0]>; -def dsub_7 : SubRegIndex<[qsub_3, dsub_1]>; - -def ssub_0 : SubRegIndex; -def ssub_1 : SubRegIndex; -def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>; -def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>; - -def gsub_0 : SubRegIndex; -def gsub_1 : SubRegIndex; +def qsub_0 : SubRegIndex<128>; +def qsub_1 : SubRegIndex<128, 128>; +def qsub_2 : ComposedSubRegIndex<qqsub_1, qsub_0>; +def qsub_3 : ComposedSubRegIndex<qqsub_1, qsub_1>; + +def dsub_0 : SubRegIndex<64>; +def dsub_1 : SubRegIndex<64, 64>; +def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>; +def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>; +def dsub_4 : ComposedSubRegIndex<qsub_2, dsub_0>; +def dsub_5 : ComposedSubRegIndex<qsub_2, dsub_1>; +def dsub_6 : ComposedSubRegIndex<qsub_3, dsub_0>; +def dsub_7 : ComposedSubRegIndex<qsub_3, dsub_1>; + +def ssub_0 : SubRegIndex<32>; +def ssub_1 : SubRegIndex<32, 32>; +def ssub_2 : ComposedSubRegIndex<dsub_1, ssub_0>; +def ssub_3 : ComposedSubRegIndex<dsub_1, ssub_1>; + +def gsub_0 : SubRegIndex<32>; +def gsub_1 : SubRegIndex<32, 32>; // Let TableGen synthesize the remaining 12 ssub_* indices. // We don't need to name them. } @@ -157,21 +157,27 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // Current Program Status Register. // We model fpscr with two registers: FPSCR models the control bits and will be -// reserved. FPSCR_NZCV models the flag bits and will be unreserved. -def CPSR : ARMReg<0, "cpsr">; -def APSR : ARMReg<1, "apsr">; -def SPSR : ARMReg<2, "spsr">; -def FPSCR : ARMReg<3, "fpscr">; -def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { +// reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV +// models the APSR when it's accessed by some special instructions. In such cases +// it has the same encoding as PC. +def CPSR : ARMReg<0, "cpsr">; +def APSR : ARMReg<1, "apsr">; +def APSR_NZCV : ARMReg<15, "apsr_nzcv">; +def SPSR : ARMReg<2, "spsr">; +def FPSCR : ARMReg<3, "fpscr">; +def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { let Aliases = [FPSCR]; } def ITSTATE : ARMReg<4, "itstate">; // Special Registers - only available in privileged mode. -def FPSID : ARMReg<0, "fpsid">; -def MVFR1 : ARMReg<6, "mvfr1">; -def MVFR0 : ARMReg<7, "mvfr0">; -def FPEXC : ARMReg<8, "fpexc">; +def FPSID : ARMReg<0, "fpsid">; +def MVFR2 : ARMReg<5, "mvfr2">; +def MVFR1 : ARMReg<6, "mvfr1">; +def MVFR0 : ARMReg<7, "mvfr0">; +def FPEXC : ARMReg<8, "fpexc">; +def FPINST : ARMReg<9, "fpinst">; +def FPINST2 : ARMReg<10, "fpinst2">; // Register classes. // @@ -207,6 +213,16 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { }]; } +// GPRs without the PC but with APSR. Some instructions allow accessing the +// APSR, while actually encoding PC in the register field. This is usefull +// for assembly and disassembly only. +def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)> { + let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; + let AltOrderSelect = [{ + return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); + }]; +} + // GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the // implied SP argument list. // FIXME: It would be better to not use this at all and refactor the @@ -236,7 +252,7 @@ def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>; // to the saved value before the tail call, which would clobber a call address. // Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of // this class and the preceding one(!) This is what we want. -def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> { +def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R12)> { let AltOrders = [(and tcGPR, tGPR)]; let AltOrderSelect = [{ return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td index 2d088de96e27..528c4ec73781 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td +++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td @@ -69,6 +69,24 @@ def WriteCMP : SchedWrite; def WriteCMPsi : SchedWrite; def WriteCMPsr : SchedWrite; +// Division. +def WriteDiv : SchedWrite; + +// Loads. +def WriteLd : SchedWrite; +def WritePreLd : SchedWrite; + +// Branches. +def WriteBr : SchedWrite; +def WriteBrL : SchedWrite; +def WriteBrTbl : SchedWrite; + +// Fixpoint conversions. +def WriteCvtFP : SchedWrite; + +// Noop. +def WriteNoop : SchedWrite; + // Define TII for use in SchedVariant Predicates. def : PredicateProlog<[{ const ARMBaseInstrInfo *TII = diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td index 9739ed20ce2e..603e775d351d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td @@ -1879,17 +1879,18 @@ def CortexA9Itineraries : ProcessorItineraries< // The following definitions describe the simpler per-operand machine model. // This works with MachineScheduler and will eventually replace itineraries. +class A9WriteLMOpsListType<list<WriteSequence> writes> { + list <WriteSequence> Writes = writes; + SchedMachineModel SchedModel = ?; +} // Cortex-A9 machine model for scheduling and other instruction cost heuristics. def CortexA9Model : SchedMachineModel { let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. - let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let MicroOpBufferSize = 56; // Based on available renamed registers. let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. - let ILPWindow = 10; // Don't reschedule small blocks to hide - // latency. Minimum latency requirements are already - // modeled strictly by reserving resources. let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; @@ -1904,7 +1905,7 @@ def A9UnitALU : ProcResource<2>; def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } def A9UnitAGU : ProcResource<1>; def A9UnitLS : ProcResource<1>; -def A9UnitFP : ProcResource<1> { let Buffered = 0; } +def A9UnitFP : ProcResource<1> { let BufferSize = 0; } def A9UnitB : ProcResource<1>; //===----------------------------------------------------------------------===// @@ -2014,7 +2015,7 @@ def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; // Define a predicate to select the LDM based on number of memory addresses. def A9LMAdr#NumAddr#Pred : - SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>; + SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>; } // foreach NumAddr @@ -2057,48 +2058,30 @@ def A9WriteL#NumAddr#Hi : WriteSequence< //===----------------------------------------------------------------------===// // LDM: Load multiple into 32-bit integer registers. +def A9WriteLMOpsList : A9WriteLMOpsListType< + [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi, + A9WriteL6, A9WriteL6Hi, + A9WriteL7, A9WriteL7Hi, + A9WriteL8, A9WriteL8Hi]>; + // A9WriteLM variants expand into a pair of writes for each 64-bit // value loaded. When the number of registers is odd, the last // A9WriteLnHi is naturally ignored because the instruction has no // following def operands. These variants take no issue resource, so // they may need to be part of a WriteSequence that includes A9WriteIssue. def A9WriteLM : SchedWriteVariant<[ - SchedVar<A9LMAdr1Pred, [A9WriteL1, A9WriteL1Hi]>, - SchedVar<A9LMAdr2Pred, [A9WriteL1, A9WriteL1Hi, - A9WriteL2, A9WriteL2Hi]>, - SchedVar<A9LMAdr3Pred, [A9WriteL1, A9WriteL1Hi, - A9WriteL2, A9WriteL2Hi, - A9WriteL3, A9WriteL3Hi]>, - SchedVar<A9LMAdr4Pred, [A9WriteL1, A9WriteL1Hi, - A9WriteL2, A9WriteL2Hi, - A9WriteL3, A9WriteL3Hi, - A9WriteL4, A9WriteL4Hi]>, - SchedVar<A9LMAdr5Pred, [A9WriteL1, A9WriteL1Hi, - A9WriteL2, A9WriteL2Hi, - A9WriteL3, A9WriteL3Hi, - A9WriteL4, A9WriteL4Hi, - A9WriteL5, A9WriteL5Hi]>, - SchedVar<A9LMAdr6Pred, [A9WriteL1, A9WriteL1Hi, - A9WriteL2, A9WriteL2Hi, - A9WriteL3, A9WriteL3Hi, - A9WriteL4, A9WriteL4Hi, - A9WriteL5, A9WriteL5Hi, - A9WriteL6, A9WriteL6Hi]>, - SchedVar<A9LMAdr7Pred, [A9WriteL1, A9WriteL1Hi, - A9WriteL2, A9WriteL2Hi, - A9WriteL3, A9WriteL3Hi, - A9WriteL4, A9WriteL4Hi, - A9WriteL5, A9WriteL5Hi, - A9WriteL6, A9WriteL6Hi, - A9WriteL7, A9WriteL7Hi]>, - SchedVar<A9LMAdr8Pred, [A9WriteL1, A9WriteL1Hi, - A9WriteL2, A9WriteL2Hi, - A9WriteL3, A9WriteL3Hi, - A9WriteL4, A9WriteL4Hi, - A9WriteL5, A9WriteL5Hi, - A9WriteL6, A9WriteL6Hi, - A9WriteL7, A9WriteL7Hi, - A9WriteL8, A9WriteL8Hi]>, + SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>, + SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>, + SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>, + SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>, + SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>, + SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>, + SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>, + SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>, // For unknown LDMs, define the maximum number of writes, but only // make the first two consume resources. SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi, @@ -2180,49 +2163,39 @@ def A9WriteLMfp#NumAddr#Hi : WriteSequence< // pair of writes for each 64-bit data loaded. When the number of // registers is odd, the last WriteLMfpnHi is naturally ignored because // the instruction has no following def operands. + +def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType< + [A9WriteLMfp1, A9WriteLMfp2, // 0-1 + A9WriteLMfp3, A9WriteLMfp4, // 2-3 + A9WriteLMfp5, A9WriteLMfp6, // 4-5 + A9WriteLMfp7, A9WriteLMfp8, // 6-7 + A9WriteLMfp1Hi, // 8-8 + A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10 + A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12 + A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14 + A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16 + A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18 + A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20 + A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22 + def A9WriteLMfpPostRA : SchedWriteVariant<[ - SchedVar<A9LMAdr1Pred, [A9WriteLMfp1, A9WriteLMfp1Hi]>, - SchedVar<A9LMAdr2Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, - A9WriteLMfp2, A9WriteLMfp2Hi]>, - SchedVar<A9LMAdr3Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, - A9WriteLMfp2, A9WriteLMfp2Hi, - A9WriteLMfp3, A9WriteLMfp3Hi]>, - SchedVar<A9LMAdr4Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, - A9WriteLMfp2, A9WriteLMfp2Hi, - A9WriteLMfp3, A9WriteLMfp3Hi, - A9WriteLMfp4, A9WriteLMfp4Hi]>, - SchedVar<A9LMAdr5Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, - A9WriteLMfp2, A9WriteLMfp2Hi, - A9WriteLMfp3, A9WriteLMfp3Hi, - A9WriteLMfp4, A9WriteLMfp4Hi, - A9WriteLMfp5, A9WriteLMfp5Hi]>, - SchedVar<A9LMAdr6Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, - A9WriteLMfp2, A9WriteLMfp2Hi, - A9WriteLMfp3, A9WriteLMfp3Hi, - A9WriteLMfp4, A9WriteLMfp4Hi, - A9WriteLMfp5, A9WriteLMfp5Hi, - A9WriteLMfp6, A9WriteLMfp6Hi]>, - SchedVar<A9LMAdr7Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, - A9WriteLMfp2, A9WriteLMfp2Hi, - A9WriteLMfp3, A9WriteLMfp3Hi, - A9WriteLMfp4, A9WriteLMfp4Hi, - A9WriteLMfp5, A9WriteLMfp5Hi, - A9WriteLMfp6, A9WriteLMfp6Hi, - A9WriteLMfp7, A9WriteLMfp7Hi]>, - SchedVar<A9LMAdr8Pred, [A9WriteLMfp1, A9WriteLMfp1Hi, - A9WriteLMfp2, A9WriteLMfp2Hi, - A9WriteLMfp3, A9WriteLMfp3Hi, - A9WriteLMfp4, A9WriteLMfp4Hi, - A9WriteLMfp5, A9WriteLMfp5Hi, - A9WriteLMfp6, A9WriteLMfp6Hi, - A9WriteLMfp7, A9WriteLMfp7Hi, - A9WriteLMfp8, A9WriteLMfp8Hi]>, + SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>, + SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>, + SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>, + SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>, + SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>, + SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>, + SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>, + SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>, // For unknown LDMs, define the maximum number of writes, but only - // make the first two consume resources. - SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp1Hi, - A9WriteLMfp2, A9WriteLMfp2Hi, - A9WriteLMfp3Hi, A9WriteLMfp3Hi, - A9WriteLMfp4Hi, A9WriteLMfp4Hi, + // make the first two consume resources. We are optimizing for the case + // where the operands are DPRs, and this determines the first eight + // types. The remaining eight types are filled to cover the case + // where the operands are SPRs. + SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2, + A9WriteLMfp3Hi, A9WriteLMfp4Hi, + A9WriteLMfp5Hi, A9WriteLMfp6Hi, + A9WriteLMfp7Hi, A9WriteLMfp8Hi, A9WriteLMfp5Hi, A9WriteLMfp5Hi, A9WriteLMfp6Hi, A9WriteLMfp6Hi, A9WriteLMfp7Hi, A9WriteLMfp7Hi, @@ -2275,10 +2248,10 @@ def A9Read4 : SchedReadAdvance<3>; // This table follows the ARM Cortex-A9 Technical Reference Manuals, // mostly in order. -def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, +def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, IIC_iMVNi,IIC_iMVNsi, IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; -def :ItinRW<[A9WriteI,A9ReadALU],[IIC_iMVNr]>; +def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>; def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; @@ -2487,10 +2460,59 @@ def : SchedAlias<WriteALUsr, A9WriteALUsr>; def : SchedAlias<WriteALUSsr, A9WriteALUsr>; def : SchedAlias<ReadALU, A9ReadALU>; def : SchedAlias<ReadALUsr, A9ReadALU>; -// FIXME: need to special case AND, ORR, EOR, BIC because they don't read -// advance. But our instrinfo claims it does. +def : InstRW< [WriteALU], + (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr", + "BICrr")>; +def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>; +def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>; + def : SchedAlias<WriteCMP, A9WriteALU>; def : SchedAlias<WriteCMPsi, A9WriteALU>; def : SchedAlias<WriteCMPsr, A9WriteALU>; + +def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi", + "MOVCCsr")>; +def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>; +def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm", + "MOV_ga_dyn")>; +def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>; +def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; + +def : InstRW< [WriteALU], (instregex "SEL")>; + +def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>; + +def : InstRW< [A9WriteM], + (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS", + "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>; +def : InstRW< [A9WriteM, A9WriteMHi], + (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL", + "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB", + "SMLALTT")>; +// FIXME: These instructions used to have NoItinerary. Just copied the one from above. +def : InstRW< [A9WriteM, A9WriteMHi], + (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX", + "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>; + +def : InstRW<[A9WriteM16, A9WriteM16Hi], + (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>; +def : InstRW<[A9WriteM16, A9WriteM16Hi], + (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>; + +def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>; +def : InstRW<[A9WriteLsi], (instregex "LDRrs")>; +def : InstRW<[A9WriteLb], + (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB", + "LDRH", "LDRSH", "LDRSB")>; +def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; + +def : WriteRes<WriteDiv, []> { let Latency = 0; } + +def : WriteRes<WriteBr, [A9UnitB]>; +def : WriteRes<WriteBrL, [A9UnitB]>; +def : WriteRes<WriteBrTbl, [A9UnitB]>; +def : WriteRes<WritePreLd, []>; +def : SchedAlias<WriteCvtFP, A9WriteF>; +def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } } // SchedModel = CortexA9Model diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td index 7c6df410706e..8d7dbc246095 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td @@ -1076,7 +1076,7 @@ def SwiftItineraries : ProcessorItineraries< // Swift machine model for scheduling and other instruction cost heuristics. def SwiftModel : SchedMachineModel { let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. - let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let MicroOpBufferSize = 45; // Based on NEON renamed registers. let LoadLatency = 3; let MispredictPenalty = 14; // A branch direction mispredict. @@ -1096,9 +1096,27 @@ let SchedModel = SwiftModel in { def SwiftUnitDiv : ProcResource<1>; // Generic resource requirements. + def SwiftWriteP0OneCycle : SchedWriteRes<[SwiftUnitP0]>; + def SwiftWriteP0TwoCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 2; } + def SwiftWriteP0FourCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 4; } + def SwiftWriteP0SixCycle : SchedWriteRes<[SwiftUnitP0]> { let Latency = 6; } + def SwiftWriteP0P1FourCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> { + let Latency = 4; + } + def SwiftWriteP0P1SixCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP1]> { + let Latency = 6; + } + def SwiftWriteP01OneCycle : SchedWriteRes<[SwiftUnitP01]>; + def SwiftWriteP1TwoCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 2; } + def SwiftWriteP1FourCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 4; } + def SwiftWriteP1SixCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 6; } + def SwiftWriteP1EightCycle : SchedWriteRes<[SwiftUnitP1]> { let Latency = 8; } + def SwiftWriteP1TwelveCyc : SchedWriteRes<[SwiftUnitP1]> { let Latency = 12; } + def SwiftWriteP01OneCycle2x : WriteSequence<[SwiftWriteP01OneCycle], 2>; + def SwiftWriteP01OneCycle3x : WriteSequence<[SwiftWriteP01OneCycle], 3>; def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; } - def SwiftWriteP01ThreeCycleTwoUops : - SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]> { + def SwiftWriteP01ThreeCycleTwoUops : SchedWriteRes<[SwiftUnitP01, + SwiftUnitP01]> { let Latency = 3; let NumMicroOps = 2; } @@ -1107,7 +1125,23 @@ let SchedModel = SwiftModel in { let NumMicroOps = 3; let ResourceCycles = [3]; } - + // Plain load without writeback. + def SwiftWriteP2ThreeCycle : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 3; + } + def SwiftWriteP2FourCycle : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 4; + } + // A store does not write to a register. + def SwiftWriteP2 : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 0; + } + foreach Num = 1-4 in { + def SwiftWrite#Num#xP2 : WriteSequence<[SwiftWriteP2], Num>; + } + def SwiftWriteP01OneCycle2x_load : WriteSequence<[SwiftWriteP01OneCycle, + SwiftWriteP01OneCycle, + SwiftWriteP2ThreeCycle]>; // 4.2.4 Arithmetic and Logical. // ALU operation register shifted by immediate variant. def SwiftWriteALUsi : SchedWriteVariant<[ @@ -1137,8 +1171,906 @@ let SchedModel = SwiftModel in { def : ReadAdvance<ReadALU, 0>; def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>; + + def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[ + SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01OneCycle]>, + SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]> + ]>; + // 4.2.5 Integer comparison def : WriteRes<WriteCMP, [SwiftUnitP01]>; - def : WriteRes<WriteCMPsi, [SwiftUnitP01]>; - def : WriteRes<WriteCMPsr, [SwiftUnitP01]>; + def : SchedAlias<WriteCMPsi, SwiftChooseShiftKindP01OneOrTwoCycle>; + def : SchedAlias<WriteCMPsr, SwiftWriteP01TwoCycle>; + + // 4.2.6 Shift, Move + // Shift + // ASR,LSL,ROR,RRX + // MOV(register-shiftedregister) MVN(register-shiftedregister) + // Move + // MOV,MVN + // MOVT + // Sign/Zero extension + def : InstRW<[SwiftWriteP01OneCycle], + (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16", + "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", + "t2UXTB16")>; + // Pseudo instructions. + def : InstRW<[SwiftWriteP01OneCycle2x], + (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi32imm", + "t2MOVi32imm", "t2MOV_ga_dyn")>; + def : InstRW<[SwiftWriteP01OneCycle3x], + (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel", "t2MOVi16_ga_pcrel")>; + def : InstRW<[SwiftWriteP01OneCycle2x_load], + (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>; + + def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>; + + def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>, + SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]> + ]>; + + // 4.2.7 Select + // SEL + def : InstRW<[SwiftPredP0OneOrTwoCycle], (instregex "SEL", "t2SEL")>; + + // 4.2.8 Bitfield + // BFI,BFC, SBFX,UBFX + def : InstRW< [SwiftWriteP01TwoCycle], + (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI", + "(t|t2)UBFX", "(t|t2)SBFX")>; + + // 4.2.9 Saturating arithmetic + def : InstRW< [SwiftWriteP01TwoCycle], + (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT", + "USAT16", "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX", + "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD", + "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT", + "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX", + "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX")>; + + // 4.2.10 Parallel Arithmetic + // Not flag setting. + def : InstRW< [SwiftWriteALUsr], + (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX", + "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8", + "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8", + "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>; + // Flag setting. + def : InstRW< [SwiftWriteP01TwoCycle], + (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX", + "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16", + "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16", + "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16", + "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX", + "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>; + + // 4.2.11 Sum of Absolute Difference + def : InstRW< [SwiftWriteP0P1FourCycle], (instregex "USAD8") >; + def : InstRW<[SwiftWriteP0P1FourCycle, ReadALU, ReadALU, SchedReadAdvance<2>], + (instregex "USADA8")>; + + // 4.2.12 Integer Multiply (32-bit result) + // Two sources. + def : InstRW< [SwiftWriteP0FourCycle], + (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT", + "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL", + "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT", + "t2SMULWB", "t2SMULWT", "t2SMUSD")>; + + def SwiftWriteP0P01FiveCycleTwoUops : + SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> { + let Latency = 5; + } + + def SwiftPredP0P01FourFiveCycle : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [ SwiftWriteP0P01FiveCycleTwoUops ]>, + SchedVar<NoSchedPred, [ SwiftWriteP0FourCycle ]> + ]>; + + def SwiftReadAdvanceFourCyclesPred : SchedReadVariant<[ + SchedVar<IsPredicatedPred, [SchedReadAdvance<4>]>, + SchedVar<NoSchedPred, [ReadALU]> + ]>; + + // Multiply accumulate, three sources + def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU, + SwiftReadAdvanceFourCyclesPred], + (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR", + "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", + "t2SMMLSR")>; + + // 4.2.13 Integer Multiply (32-bit result, Q flag) + def : InstRW< [SwiftWriteP0FourCycle], + (instregex "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX")>; + def : InstRW< [SwiftPredP0P01FourFiveCycle, ReadALU, ReadALU, + SwiftReadAdvanceFourCyclesPred], + (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX", + "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT", + "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT")>; + def : InstRW< [SwiftPredP0P01FourFiveCycle], + (instregex "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX")>; + + def SwiftP0P0P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; + } + def SwiftWrite1Cycle : SchedWriteRes<[]> { + let Latency = 1; + let NumMicroOps = 0; + } + def SwiftWrite5Cycle : SchedWriteRes<[]> { + let Latency = 5; + let NumMicroOps = 0; + } + def SwiftWrite6Cycle : SchedWriteRes<[]> { + let Latency = 6; + let NumMicroOps = 0; + } + + // 4.2.14 Integer Multiply, Long + def : InstRW< [SwiftP0P0P01FiveCycle, SwiftWrite5Cycle], + (instregex "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$")>; + + def Swift2P03P01FiveCycle : SchedWriteRes<[SwiftUnitP0, SwiftUnitP01]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [2, 3]; + } + + // 4.2.15 Integer Multiply Accumulate, Long + // 4.2.16 Integer Multiply Accumulate, Dual + // 4.2.17 Integer Multiply Accumulate Accumulate, Long + // We are being a bit inaccurate here. + def : InstRW< [SwiftWrite5Cycle, Swift2P03P01FiveCycle, ReadALU, ReadALU, + SchedReadAdvance<4>, SchedReadAdvance<3>], + (instregex "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT", + "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX", + "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB", "t2SMLALBT", + "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", "t2SMLSLD", "t2SMLSLDX", + "t2UMAAL")>; + + def SwiftDiv : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> { + let NumMicroOps = 1; + let Latency = 14; + let ResourceCycles = [1, 14]; + } + // 4.2.18 Integer Divide + def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround. + def : InstRW <[SwiftDiv], + (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; + + // 4.2.19 Integer Load Single Element + // 4.2.20 Integer Load Signextended + def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { + let Latency = 3; + let NumMicroOps = 2; + } + def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { + let Latency = 4; + let NumMicroOps = 2; + } + def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01, + SwiftUnitP01]> { + let Latency = 4; + let NumMicroOps = 3; + } + def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> { + let Latency = 3; + let NumMicroOps = 2; + } + def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2, + SwiftUnitP01]> { + let Latency = 3; + let NumMicroOps = 3; + } + def SwiftWrBackOne : SchedWriteRes<[]> { + let Latency = 1; + let NumMicroOps = 0; + } + def SwiftWriteLdFour : SchedWriteRes<[]> { + let Latency = 4; + let NumMicroOps = 0; + } + // Not accurate. + def : InstRW<[SwiftWriteP2ThreeCycle], + (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)", + "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "tLDR[BH](r|i|spi|pci|pciASM)", + "tLDR(r|i|spi|pci|pciASM)")>; + def : InstRW<[SwiftWriteP2ThreeCycle], + (instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>; + def : InstRW<[SwiftWriteP2P01FourCyle], + (instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$", + "t2LDRpci_pic", "tLDRS(B|H)")>; + def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne], + (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)", + "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)", + "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T")>; + def : InstRW<[SwiftWriteP2P01P01FourCycle, SwiftWrBackOne], + (instregex "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)", + "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T")>; + + // 4.2.21 Integer Dual Load + // Not accurate. + def : InstRW<[SwiftWriteP2P2ThreeCycle, SwiftWriteLdFour], + (instregex "t2LDRDi8", "LDRD$")>; + def : InstRW<[SwiftWriteP2P2P01ThreeCycle, SwiftWriteLdFour, SwiftWrBackOne], + (instregex "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>; + + // 4.2.22 Integer Load, Multiple + // NumReg = 1 .. 16 + foreach Lat = 3-25 in { + def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> { + let Latency = Lat; + } + def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> { + let Latency = Lat; + let NumMicroOps = 0; + } + } + // Predicate. + foreach NumAddr = 1-16 in { + def SwiftLMAddr#NumAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>; + } + def SwiftWriteLDMAddrNoWB : SchedWriteRes<[SwiftUnitP01]> { let Latency = 0; } + def SwiftWriteLDMAddrWB : SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]>; + def SwiftWriteLM : SchedWriteVariant<[ + SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy]>, + SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy]>, + SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy]>, + SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy]>, + SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy]>, + SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy]>, + SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy]>, + SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy]>, + SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy]>, + SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy]>, + SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy]>, + SchedVar<SwiftLMAddr13Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy, + SwiftWriteLM15Cy]>, + SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy, + SwiftWriteLM15Cy, SwiftWriteLM16Cy]>, + SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy, + SwiftWriteLM15Cy, SwiftWriteLM16Cy, + SwiftWriteLM17Cy]>, + SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5Cy, SwiftWriteLM6Cy, + SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM12Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy, + SwiftWriteLM15Cy, SwiftWriteLM16Cy, + SwiftWriteLM17Cy, SwiftWriteLM18Cy]>, + // Unknow number of registers, just use resources for two registers. + SchedVar<NoSchedPred, [SwiftWriteLM3Cy, SwiftWriteLM4Cy, + SwiftWriteLM5CyNo, SwiftWriteLM6CyNo, + SwiftWriteLM7CyNo, SwiftWriteLM8CyNo, + SwiftWriteLM9CyNo, SwiftWriteLM10CyNo, + SwiftWriteLM11CyNo, SwiftWriteLM12CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM15CyNo, SwiftWriteLM16CyNo, + SwiftWriteLM17CyNo, SwiftWriteLM18CyNo]> + + ]> { let Variadic=1; } + + def : InstRW<[SwiftWriteLM, SwiftWriteLDMAddrNoWB], + (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$", + "(t|sys)LDM(IA|DA|DB|IB)$")>; + def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM], + (instregex /*"t2LDMIA_RET", "tLDMIA_RET", "LDMIA_RET",*/ + "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>; + def : InstRW<[SwiftWriteLDMAddrWB, SwiftWriteLM, SwiftWriteP1TwoCycle], + (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>; + // 4.2.23 Integer Store, Single Element + def : InstRW<[SwiftWriteP2], + (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", + "t2STR(i12|i8|s)$", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>; + + def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2], + (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)", + "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)", + "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)", + "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>; + + // 4.2.24 Integer Store, Dual + def : InstRW<[SwiftWriteP2, SwiftWriteP2, SwiftWriteP01OneCycle], + (instregex "STRD$", "t2STRDi8")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteP2, SwiftWriteP2, + SwiftWriteP01OneCycle], + (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>; + + // 4.2.25 Integer Store, Multiple + def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> { + let Latency = 0; + let NumMicroOps = 2; + } + foreach NumAddr = 1-16 in { + def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>; + } + def SwiftWriteSTM : SchedWriteVariant<[ + SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM2]>, + SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM3]>, + SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM4]>, + SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM5]>, + SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM6]>, + SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM7]>, + SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM8]>, + SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM9]>, + SchedVar<SwiftLMAddr10Pred,[SwiftWriteSTM10]>, + SchedVar<SwiftLMAddr11Pred,[SwiftWriteSTM11]>, + SchedVar<SwiftLMAddr12Pred,[SwiftWriteSTM12]>, + SchedVar<SwiftLMAddr13Pred,[SwiftWriteSTM13]>, + SchedVar<SwiftLMAddr14Pred,[SwiftWriteSTM14]>, + SchedVar<SwiftLMAddr15Pred,[SwiftWriteSTM15]>, + SchedVar<SwiftLMAddr16Pred,[SwiftWriteSTM16]>, + // Unknow number of registers, just use resources for two registers. + SchedVar<NoSchedPred, [SwiftWriteSTM2]> + ]>; + def : InstRW<[SwiftWriteSTM], + (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWriteSTM], + (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD", + "PUSH", "tPUSH")>; + + // 4.2.26 Branch + def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; } + def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; } + def : WriteRes<WriteBrTbl, [SwiftUnitP1, SwiftUnitP2]> { let Latency = 0; } + + // 4.2.27 Not issued + def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } + def : InstRW<[WriteNoop], (instregex "t2IT", "IT", "NOP")>; + + // 4.2.28 Advanced SIMD, Integer, 2 cycle + def : InstRW<[SwiftWriteP0TwoCycle], + (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL", + "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi", + "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST", + "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL", "VQSHLU", "VBIF", + "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>; + + def : InstRW<[SwiftWriteP1TwoCycle], + (instregex "VEXT", "VREV16", "VREV32", "VREV64")>; + + // 4.2.29 Advanced SIMD, Integer, 4 cycle + // 4.2.30 Advanced SIMD, Integer with Accumulate + def : InstRW<[SwiftWriteP0FourCycle], + (instregex "VABA", "VABAL", "VPADAL", "VRSRA", "VSRA", "VACGE", "VACGT", + "VACLE", "VACLT", "VCEQ", "VCGE", "VCGT", "VCLE", "VCLT", "VRSHL", + "VQRSHL", "VRSHR(u|s)", "VABS(f|v)", "VQABS", "VQNEG", "VQADD", + "VQSUB")>; + def : InstRW<[SwiftWriteP1FourCycle], + (instregex "VRECPE", "VRSQRTE")>; + + // 4.2.31 Advanced SIMD, Add and Shift with Narrow + def : InstRW<[SwiftWriteP0P1FourCycle], + (instregex "VADDHN", "VSUBHN", "VSHRN")>; + def : InstRW<[SwiftWriteP0P1SixCycle], + (instregex "VRADDHN", "VRSUBHN", "VRSHRN", "VQSHRN", "VQSHRUN", + "VQRSHRN", "VQRSHRUN")>; + + // 4.2.32 Advanced SIMD, Vector Table Lookup + foreach Num = 1-4 in { + def SwiftWrite#Num#xP1TwoCycle : WriteSequence<[SwiftWriteP1TwoCycle], Num>; + } + def : InstRW<[SwiftWrite1xP1TwoCycle], + (instregex "VTB(L|X)1")>; + def : InstRW<[SwiftWrite2xP1TwoCycle], + (instregex "VTB(L|X)2")>; + def : InstRW<[SwiftWrite3xP1TwoCycle], + (instregex "VTB(L|X)3")>; + def : InstRW<[SwiftWrite4xP1TwoCycle], + (instregex "VTB(L|X)4")>; + + // 4.2.33 Advanced SIMD, Transpose + def : InstRW<[SwiftWriteP1FourCycle, SwiftWriteP1FourCycle, + SwiftWriteP1TwoCycle/*RsrcOnly*/, SchedReadAdvance<2>], + (instregex "VSWP", "VTRN", "VUZP", "VZIP")>; + + // 4.2.34 Advanced SIMD and VFP, Floating Point + def : InstRW<[SwiftWriteP0TwoCycle], (instregex "VABS(S|D)$", "VNEG(S|D)$")>; + def : InstRW<[SwiftWriteP0FourCycle], + (instregex "VCMP(D|S|ZD|ZS)$", "VCMPE(D|S|ZD|ZS)")>; + def : InstRW<[SwiftWriteP0FourCycle], + (instregex "VADD(S|f)", "VSUB(S|f)", "VABD", "VPADDf", "VMAX", "VMIN", "VPMAX", + "VPMIN")>; + def : InstRW<[SwiftWriteP0SixCycle], (instregex "VADDD$", "VSUBD$")>; + def : InstRW<[SwiftWriteP1EightCycle], (instregex "VRECPS", "VRSQRTS")>; + + // 4.2.35 Advanced SIMD and VFP, Multiply + def : InstRW<[SwiftWriteP1FourCycle], + (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH", + "VMULL", "VQDMULL")>; + def : InstRW<[SwiftWriteP1SixCycle], + (instregex "VMULD", "VNMULD")>; + def : InstRW<[SwiftWriteP1FourCycle], + (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)", + "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>; + def : InstRW<[SwiftWriteP1EightCycle], (instregex "VFMAfd", "VFMSfd")>; + def : InstRW<[SwiftWriteP1TwelveCyc], (instregex "VFMAfq", "VFMSfq")>; + + // 4.2.36 Advanced SIMD and VFP, Convert + def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>; + // Fixpoint conversions. + def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; } + + // 4.2.37 Advanced SIMD and VFP, Move + def : InstRW<[SwiftWriteP0TwoCycle], + (instregex "VMOVv", "VMOV(S|D)$", "VMOV(S|D)cc", + "VMVNv", "VMVN(d|q)", "VMVN(S|D)cc", + "FCONST(D|S)")>; + def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VMOVN", "VMOVL")>; + def : InstRW<[WriteSequence<[SwiftWriteP0FourCycle, SwiftWriteP1TwoCycle]>], + (instregex "VQMOVN")>; + def : InstRW<[SwiftWriteP1TwoCycle], (instregex "VDUPLN", "VDUPf")>; + def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>], + (instregex "VDUP(8|16|32)")>; + def : InstRW<[SwiftWriteP2ThreeCycle], (instregex "VMOVRS$")>; + def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP0TwoCycle]>], + (instregex "VMOVSR$", "VSETLN")>; + def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2FourCycle], + (instregex "VMOVRR(D|S)$")>; + def : InstRW<[SwiftWriteP2FourCycle], (instregex "VMOVDRR$")>; + def : InstRW<[WriteSequence<[SwiftWriteP2FourCycle, SwiftWriteP1TwoCycle]>, + WriteSequence<[SwiftWrite1Cycle, SwiftWriteP2FourCycle, + SwiftWriteP1TwoCycle]>], + (instregex "VMOVSRR$")>; + def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle]>], + (instregex "VGETLN(u|i)")>; + def : InstRW<[WriteSequence<[SwiftWriteP1TwoCycle, SwiftWriteP2ThreeCycle, + SwiftWriteP01OneCycle]>], + (instregex "VGETLNs")>; + + // 4.2.38 Advanced SIMD and VFP, Move FPSCR + // Serializing instructions. + def SwiftWaitP0For15Cy : SchedWriteRes<[SwiftUnitP0]> { + let Latency = 15; + let ResourceCycles = [15]; + } + def SwiftWaitP1For15Cy : SchedWriteRes<[SwiftUnitP1]> { + let Latency = 15; + let ResourceCycles = [15]; + } + def SwiftWaitP2For15Cy : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 15; + let ResourceCycles = [15]; + } + def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy], + (instregex "VMRS")>; + def : InstRW<[SwiftWaitP0For15Cy, SwiftWaitP1For15Cy, SwiftWaitP2For15Cy], + (instregex "VMSR")>; + // Not serializing. + def : InstRW<[SwiftWriteP0TwoCycle], (instregex "FMSTAT")>; + + // 4.2.39 Advanced SIMD and VFP, Load Single Element + def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDRD$", "VLDRS$")>; + + // 4.2.40 Advanced SIMD and VFP, Store Single Element + def : InstRW<[SwiftWriteLM4Cy], (instregex "VSTRD$", "VSTRS$")>; + + // 4.2.41 Advanced SIMD and VFP, Load Multiple + // 4.2.42 Advanced SIMD and VFP, Store Multiple + + // Resource requirement for permuting, just reserves the resources. + foreach Num = 1-28 in { + def SwiftVLDMPerm#Num : SchedWriteRes<[SwiftUnitP1]> { + let Latency = 0; + let NumMicroOps = Num; + let ResourceCycles = [Num]; + } + } + + // Pre RA pseudos - load/store to a Q register as a D register pair. + def : InstRW<[SwiftWriteLM4Cy], (instregex "VLDMQIA$", "VSTMQIA$")>; + + // Post RA not modelled accurately. We assume that register use of width 64 + // bit maps to a D register, 128 maps to a Q register. Not all different kinds + // are accurately represented. + def SwiftWriteVLDM : SchedWriteVariant<[ + // Load of one S register. + SchedVar<SwiftLMAddr1Pred, [SwiftWriteLM4Cy]>, + // Load of one D register. + SchedVar<SwiftLMAddr2Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo]>, + // Load of 3 S register. + SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm3]>, + // Load of a Q register (not neccessarily true). We should not be mapping to + // 4 S registers, either. + SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo, + SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>, + // Load of 5 S registers. + SchedVar<SwiftLMAddr5Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM17CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm5]>, + // Load of 3 D registers. (Must also be able to handle s register list - + // though, not accurate) + SchedVar<SwiftLMAddr6Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM10Cy, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm5]>, + // Load of 7 S registers. + SchedVar<SwiftLMAddr7Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13Cy, SwiftWriteLM14CyNo, + SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, + SwiftWriteLM21CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm7]>, + // Load of two Q registers. + SchedVar<SwiftLMAddr8Pred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM13Cy, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm2]>, + // Load of 9 S registers. + SchedVar<SwiftLMAddr9Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13Cy, SwiftWriteLM14CyNo, + SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm9]>, + // Load of 5 D registers. + SchedVar<SwiftLMAddr10Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM10Cy, SwiftWriteLM14Cy, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm5]>, + // Inaccurate: reuse describtion from 9 S registers. + SchedVar<SwiftLMAddr11Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13Cy, SwiftWriteLM14CyNo, + SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm9]>, + // Load of three Q registers. + SchedVar<SwiftLMAddr12Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM11Cy, SwiftWriteLM11Cy, + SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, + SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, + SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, + SwiftWriteLM11CyNo, SwiftWriteLM11CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm3]>, + // Inaccurate: reuse describtion from 9 S registers. + SchedVar<SwiftLMAddr13Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13Cy, SwiftWriteLM14CyNo, + SwiftWriteLM17CyNo, SwiftWriteLM18CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm9]>, + // Load of 7 D registers inaccurate. + SchedVar<SwiftLMAddr14Pred,[SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM10Cy, SwiftWriteLM14Cy, + SwiftWriteLM14Cy, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteLM14CyNo, SwiftWriteLM14CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm7]>, + SchedVar<SwiftLMAddr15Pred,[SwiftWriteLM9Cy, SwiftWriteLM10Cy, + SwiftWriteLM13Cy, SwiftWriteLM14Cy, + SwiftWriteLM17Cy, SwiftWriteLM18CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM21CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM25CyNo, SwiftWriteP01OneCycle, + SwiftVLDMPerm9]>, + // Load of 4 Q registers. + SchedVar<SwiftLMAddr16Pred,[SwiftWriteLM7Cy, SwiftWriteLM10Cy, + SwiftWriteLM11Cy, SwiftWriteLM14Cy, + SwiftWriteLM15Cy, SwiftWriteLM18CyNo, + SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, + SwiftWriteLM19CyNo, SwiftWriteLM22CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm4]>, + // Unknow number of registers, just use resources for two registers. + SchedVar<NoSchedPred, [SwiftWriteLM7Cy, SwiftWriteLM8Cy, + SwiftWriteLM13Cy, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteLM13CyNo, SwiftWriteLM13CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm2]> + ]> { let Variadic = 1; } + + def : InstRW<[SwiftWriteVLDM], (instregex "VLDM[SD](IA|DB)$")>; + + def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVLDM], + (instregex "VLDM[SD](IA|DB)_UPD$")>; + + def SwiftWriteVSTM : SchedWriteVariant<[ + // One S register. + SchedVar<SwiftLMAddr1Pred, [SwiftWriteSTM1]>, + // One D register. + SchedVar<SwiftLMAddr2Pred, [SwiftWriteSTM1]>, + // Three S registers. + SchedVar<SwiftLMAddr3Pred, [SwiftWriteSTM4]>, + // Assume one Q register. + SchedVar<SwiftLMAddr4Pred, [SwiftWriteSTM1]>, + SchedVar<SwiftLMAddr5Pred, [SwiftWriteSTM6]>, + // Assume three D registers. + SchedVar<SwiftLMAddr6Pred, [SwiftWriteSTM4]>, + SchedVar<SwiftLMAddr7Pred, [SwiftWriteSTM8]>, + // Assume two Q registers. + SchedVar<SwiftLMAddr8Pred, [SwiftWriteSTM3]>, + SchedVar<SwiftLMAddr9Pred, [SwiftWriteSTM10]>, + // Assume 5 D registers. + SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>, + SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>, + // Asume three Q registers. + SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>, + SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>, + // Assume 7 D registers. + SchedVar<SwiftLMAddr14Pred, [SwiftWriteSTM8]>, + SchedVar<SwiftLMAddr15Pred, [SwiftWriteSTM16]>, + // Assume four Q registers. + SchedVar<SwiftLMAddr16Pred, [SwiftWriteSTM5]>, + // Asumme two Q registers. + SchedVar<NoSchedPred, [SwiftWriteSTM3]> + ]> { let Variadic = 1; } + + def : InstRW<[SwiftWriteVSTM], (instregex "VSTM[SD](IA|DB)$")>; + + def : InstRW<[SwiftWriteP01OneCycle2x, SwiftWriteVSTM], + (instregex "VSTM[SD](IA|DB)_UPD")>; + + // 4.2.43 Advanced SIMD, Element or Structure Load and Store + def SwiftWrite2xP2FourCy : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 4; + let ResourceCycles = [2]; + } + def SwiftWrite3xP2FourCy : SchedWriteRes<[SwiftUnitP2]> { + let Latency = 4; + let ResourceCycles = [3]; + } + foreach Num = 1-2 in { + def SwiftExt#Num#xP0 : SchedWriteRes<[SwiftUnitP0]> { + let Latency = 0; + let NumMicroOps = Num; + let ResourceCycles = [Num]; + } + } + // VLDx + // Multiple structures. + // Single element structure loads. + // We assume aligned. + // Single/two register. + def : InstRW<[SwiftWriteLM4Cy], (instregex "VLD1(d|q)(8|16|32|64)$")>; + def : InstRW<[SwiftWriteLM4Cy, SwiftWriteP01OneCycle], + (instregex "VLD1(d|q)(8|16|32|64)wb")>; + // Three register. + def : InstRW<[SwiftWrite3xP2FourCy], + (instregex "VLD1(d|q)(8|16|32|64)T$", "VLD1d64TPseudo")>; + def : InstRW<[SwiftWrite3xP2FourCy, SwiftWriteP01OneCycle], + (instregex "VLD1(d|q)(8|16|32|64)Twb")>; + /// Four Register. + def : InstRW<[SwiftWrite2xP2FourCy], + (instregex "VLD1(d|q)(8|16|32|64)Q$", "VLD1d64QPseudo")>; + def : InstRW<[SwiftWrite2xP2FourCy, SwiftWriteP01OneCycle], + (instregex "VLD1(d|q)(8|16|32|64)Qwb")>; + // Two element structure loads. + // Two/four register. + def : InstRW<[SwiftWriteLM9Cy, SwiftExt2xP0, SwiftVLDMPerm2], + (instregex "VLD2(d|q|b)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftExt2xP0, + SwiftVLDMPerm2], + (instregex "VLD2(d|q|b)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>; + // Three element structure. + def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo, + SwiftVLDMPerm3, SwiftWrite3xP2FourCy], + (instregex "VLD3(d|q)(8|16|32)$")>; + def : InstRW<[SwiftWriteLM9Cy, SwiftVLDMPerm3, SwiftWrite3xP2FourCy], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>; + + def : InstRW<[SwiftWriteLM9Cy, SwiftWriteLM9CyNo, SwiftWriteLM9CyNo, + SwiftWriteP01OneCycle, SwiftVLDMPerm3, SwiftWrite3xP2FourCy], + (instregex "VLD3(d|q)(8|16|32)_UPD$")>; + def : InstRW<[SwiftWriteLM9Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm3, + SwiftWrite3xP2FourCy], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + // Four element structure loads. + def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy, + SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4, + SwiftWrite3xP2FourCy], + (instregex "VLD4(d|q)(8|16|32)$")>; + def : InstRW<[SwiftWriteLM11Cy, SwiftExt2xP0, SwiftVLDMPerm4, + SwiftWrite3xP2FourCy], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>; + def : InstRW<[SwiftWriteLM11Cy, SwiftWriteLM11Cy, SwiftWriteLM11Cy, + SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0, + SwiftVLDMPerm4, SwiftWrite3xP2FourCy], + (instregex "VLD4(d|q)(8|16|32)_UPD")>; + def : InstRW<[SwiftWriteLM11Cy, SwiftWriteP01OneCycle, SwiftExt2xP0, + SwiftVLDMPerm4, SwiftWrite3xP2FourCy], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + + // Single all/lane loads. + // One element structure. + def : InstRW<[SwiftWriteLM6Cy, SwiftVLDMPerm2], + (instregex "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftVLDMPerm2], + (instregex "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", + "VLD1LNq(8|16|32)Pseudo_UPD")>; + // Two element structure. + def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftExt1xP0, SwiftVLDMPerm2], + (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$", + "VLD2LN(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteLM6Cy, SwiftWriteLM6Cy, SwiftWriteP01OneCycle, + SwiftExt1xP0, SwiftVLDMPerm2], + (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>; + def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy, + SwiftExt1xP0, SwiftVLDMPerm2], + (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb")>; + def : InstRW<[SwiftWriteLM6Cy, SwiftWriteP01OneCycle, SwiftWriteLM6Cy, + SwiftExt1xP0, SwiftVLDMPerm2], + (instregex "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>; + // Three element structure. + def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, SwiftExt1xP0, + SwiftVLDMPerm3], + (instregex "VLD3(DUP|LN)(d|q)(8|16|32)$", + "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteLM7Cy, SwiftWriteLM8Cy, SwiftWriteLM8Cy, + SwiftWriteP01OneCycle, SwiftExt1xP0, SwiftVLDMPerm3], + (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>; + def : InstRW<[SwiftWriteLM7Cy, SwiftWriteP01OneCycle, SwiftWriteLM8Cy, + SwiftWriteLM8Cy, SwiftExt1xP0, SwiftVLDMPerm3], + (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; + // Four element struture. + def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo, + SwiftWriteLM10CyNo, SwiftExt1xP0, SwiftVLDMPerm5], + (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$", + "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteLM8Cy, SwiftWriteLM9Cy, SwiftWriteLM10CyNo, + SwiftWriteLM10CyNo, SwiftWriteP01OneCycle, SwiftExt1xP0, + SwiftVLDMPerm5], + (instregex "VLD4(DUP|LN)(d|q)(8|16|32)_UPD")>; + def : InstRW<[SwiftWriteLM8Cy, SwiftWriteP01OneCycle, SwiftWriteLM9Cy, + SwiftWriteLM10CyNo, SwiftWriteLM10CyNo, SwiftExt1xP0, + SwiftVLDMPerm5], + (instregex "VLD4(DUP|LN)(d|q)(8|16|32)Pseudo_UPD")>; + // VSTx + // Multiple structures. + // Single element structure store. + def : InstRW<[SwiftWrite1xP2], (instregex "VST1d(8|16|32|64)$")>; + def : InstRW<[SwiftWrite2xP2], (instregex "VST1q(8|16|32|64)$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2], + (instregex "VST1d(8|16|32|64)wb")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2], + (instregex "VST1q(8|16|32|64)wb")>; + def : InstRW<[SwiftWrite3xP2], + (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite3xP2], + (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>; + def : InstRW<[SwiftWrite4xP2], + (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2], + (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>; + // Two element structure store. + def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1], + (instregex "VST2(d|b)(8|16|32)$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1], + (instregex "VST2(b|d)(8|16|32)wb")>; + def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2], + (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2], + (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>; + // Three element structure store. + def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2], + (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2], + (instregex "VST3(d|q)(8|16|32)_UPD", + "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; + // Four element structure store. + def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2], + (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm4], + (instregex "VST4(d|q)(8|16|32)_UPD", + "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; + // Single/all lane store. + // One element structure. + def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm1], + (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm1], + (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>; + // Two element structure. + def : InstRW<[SwiftWrite1xP2, SwiftVLDMPerm2], + (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite1xP2, SwiftVLDMPerm2], + (instregex "VST2LN(d|q)(8|16|32)_UPD", + "VST2LN(d|q)(8|16|32)Pseudo_UPD")>; + // Three element structure. + def : InstRW<[SwiftWrite4xP2, SwiftVLDMPerm2], + (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite4xP2, SwiftVLDMPerm2], + (instregex "VST3LN(d|q)(8|16|32)_UPD", + "VST3LN(d|q)(8|16|32)Pseudo_UPD")>; + // Four element structure. + def : InstRW<[SwiftWrite2xP2, SwiftVLDMPerm2], + (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>; + def : InstRW<[SwiftWriteP01OneCycle, SwiftWrite2xP2, SwiftVLDMPerm2], + (instregex "VST4LN(d|q)(8|16|32)_UPD", + "VST4LN(d|q)(8|16|32)Pseudo_UPD")>; + + // 4.2.44 VFP, Divide and Square Root + def SwiftDiv17 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> { + let NumMicroOps = 1; + let Latency = 17; + let ResourceCycles = [1, 15]; + } + def SwiftDiv32 : SchedWriteRes<[SwiftUnitP0, SwiftUnitDiv]> { + let NumMicroOps = 1; + let Latency = 32; + let ResourceCycles = [1, 30]; + } + def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>; + def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>; + + // Not specified. + def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>; + // Preload. + def : WriteRes<WritePreLd, [SwiftUnitP2]> { let Latency = 0; + let ResourceCycles = [0]; + } + } diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 41a7e0c2c8a5..93add6ee33cf 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -26,7 +26,7 @@ ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { } SDValue -ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, +ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, @@ -140,7 +140,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, // GNU library uses (ptr, value, size) // See RTABI section 4.3.4 SDValue ARMSelectionDAGInfo:: -EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, +EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index 6419a737295a..56c9375855dd 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -45,7 +45,7 @@ public: ~ARMSelectionDAGInfo(); virtual - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, @@ -55,7 +55,7 @@ public: // Adjust parameters for memset, see RTABI section 4.3.4 virtual - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, unsigned Align, diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index 8653c462f06b..a11629852ad7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -32,20 +32,53 @@ ReserveR9("arm-reserve-r9", cl::Hidden, cl::desc("Reserve R9, making it unavailable as GPR")); static cl::opt<bool> -DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden); +ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden); static cl::opt<bool> UseFusedMulOps("arm-use-mulops", cl::init(true), cl::Hidden); -static cl::opt<bool> -StrictAlign("arm-strict-align", cl::Hidden, - cl::desc("Disallow all unaligned memory accesses")); +enum AlignMode { + DefaultAlign, + StrictAlign, + NoStrictAlign +}; + +static cl::opt<AlignMode> +Align(cl::desc("Load/store alignment support"), + cl::Hidden, cl::init(DefaultAlign), + cl::values( + clEnumValN(DefaultAlign, "arm-default-align", + "Generate unaligned accesses only on hardware/OS " + "combinations that are known to support them"), + clEnumValN(StrictAlign, "arm-strict-align", + "Disallow all unaligned memory accesses"), + clEnumValN(NoStrictAlign, "arm-no-strict-align", + "Allow unaligned memory accesses"), + clEnumValEnd)); + +enum ITMode { + DefaultIT, + RestrictedIT, + NoRestrictedIT +}; + +static cl::opt<ITMode> +IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), + cl::ZeroOrMore, + cl::values(clEnumValN(DefaultIT, "arm-default-it", + "Generate IT block based on arch"), + clEnumValN(RestrictedIT, "arm-restrict-it", + "Disallow deprecated IT based on ARMv8"), + clEnumValN(NoRestrictedIT, "arm-no-restrict-it", + "Allow IT blocks based on ARMv7"), + clEnumValEnd)); ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, const TargetOptions &Options) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) + , ARMProcClass(None) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) @@ -60,11 +93,14 @@ void ARMSubtarget::initializeEnvironment() { HasV5TOps = false; HasV5TEOps = false; HasV6Ops = false; + HasV6MOps = false; HasV6T2Ops = false; HasV7Ops = false; + HasV8Ops = false; HasVFPv2 = false; HasVFPv3 = false; HasVFPv4 = false; + HasFPARMv8 = false; HasNEON = false; UseNEONForSinglePrecisionFP = false; UseMulOps = UseFusedMulOps; @@ -73,7 +109,6 @@ void ARMSubtarget::initializeEnvironment() { SlowFPBrcc = false; InThumbMode = false; HasThumb2 = false; - IsMClass = false; NoARM = false; PostRAScheduler = false; IsR9Reserved = ReserveR9; @@ -90,8 +125,12 @@ void ARMSubtarget::initializeEnvironment() { AvoidMOVsShifterOperand = false; HasRAS = false; HasMPExtension = false; + HasVirtualization = false; FPOnlySP = false; + HasPerfMon = false; HasTrustZone = false; + HasCrypto = false; + HasCRC = false; AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; @@ -115,8 +154,13 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { } void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { - if (CPUString.empty()) - CPUString = "generic"; + if (CPUString.empty()) { + if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s")) + // Default to the Swift CPU when targeting armv7s/thumbv7s. + CPUString = "swift"; + else + CPUString = "generic"; + } // Insert the architecture feature derived from the target triple into the // feature string. This is important for setting features that are implied @@ -134,7 +178,7 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a // ARM version or CPU and then remove this. if (!HasV6T2Ops && hasThumb2()) - HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true; + HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6MOps = HasV6T2Ops = true; // Keep a pointer to static instruction cost data for the specified CPU. SchedModel = getSchedModelForCPU(CPUString); @@ -151,21 +195,56 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { if (isAAPCS_ABI()) stackAlignment = 8; - if (!isTargetIOS()) - UseMovt = hasV6T2Ops(); - else { + UseMovt = hasV6T2Ops() && ArmUseMOVT; + + if (!isTargetIOS()) { + IsR9Reserved = ReserveR9; + } else { IsR9Reserved = ReserveR9 | !HasV6Ops; - UseMovt = DarwinUseMOVT && hasV6T2Ops(); SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0); } if (!isThumb() || hasThumb2()) PostRAScheduler = true; - // v6+ may or may not support unaligned mem access depending on the system - // configuration. - if (!StrictAlign && hasV6Ops() && isTargetDarwin()) - AllowsUnalignedMem = true; + switch (Align) { + case DefaultAlign: + // Assume pre-ARMv6 doesn't support unaligned accesses. + // + // ARMv6 may or may not support unaligned accesses depending on the + // SCTLR.U bit, which is architecture-specific. We assume ARMv6 + // Darwin targets support unaligned accesses, and others don't. + // + // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit + // which raises an alignment fault on unaligned accesses. Linux + // defaults this bit to 0 and handles it as a system-wide (not + // per-process) setting. It is therefore safe to assume that ARMv7+ + // Linux targets support unaligned accesses. The same goes for NaCl. + // + // The above behavior is consistent with GCC. + AllowsUnalignedMem = ( + (hasV7Ops() && (isTargetLinux() || isTargetNaCl())) || + (hasV6Ops() && isTargetDarwin())); + break; + case StrictAlign: + AllowsUnalignedMem = false; + break; + case NoStrictAlign: + AllowsUnalignedMem = true; + break; + } + + switch (IT) { + case DefaultIT: + RestrictIT = hasV8Ops() ? true : false; + break; + case RestrictedIT: + RestrictIT = true; + break; + case NoRestrictedIT: + RestrictIT = false; + break; + } // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. uint64_t Bits = getFeatureBits(); @@ -231,12 +310,15 @@ unsigned ARMSubtarget::getMispredictionPenalty() const { return SchedModel->MispredictPenalty; } +bool ARMSubtarget::hasSinCos() const { + return getTargetTriple().getOS() == Triple::IOS && + !getTargetTriple().isOSVersionLT(7, 0); +} + bool ARMSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; - CriticalPathRCs.clear(); - CriticalPathRCs.push_back(&ARM::GPRRegClass); + Mode = TargetSubtargetInfo::ANTIDEP_NONE; return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index 038eb76ae1d2..5276901bbb92 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -31,26 +31,36 @@ class TargetOptions; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift + Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift, CortexA53, CortexA57 + }; + enum ARMProcClassEnum { + None, AClass, RClass, MClass }; /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. ARMProcFamilyEnum ARMProcFamily; - /// HasV4TOps, HasV5TOps, HasV5TEOps, HasV6Ops, HasV6T2Ops, HasV7Ops - + /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass. + ARMProcClassEnum ARMProcClass; + + /// HasV4TOps, HasV5TOps, HasV5TEOps, + /// HasV6Ops, HasV6MOps, HasV6T2Ops, HasV7Ops, HasV8Ops - /// Specify whether target support specific ARM ISA variants. bool HasV4TOps; bool HasV5TOps; bool HasV5TEOps; bool HasV6Ops; + bool HasV6MOps; bool HasV6T2Ops; bool HasV7Ops; + bool HasV8Ops; - /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what + /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; bool HasVFPv4; + bool HasFPARMv8; bool HasNEON; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been @@ -79,10 +89,6 @@ protected: /// HasThumb2 - True if Thumb2 instructions are supported. bool HasThumb2; - /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs - - /// v6m, v7m for example. - bool IsMClass; - /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM; @@ -144,18 +150,37 @@ protected: /// extension (ARMv7 only). bool HasMPExtension; + /// HasVirtualization - True if the subtarget supports the Virtualization + /// extension. + bool HasVirtualization; + /// FPOnlySP - If true, the floating point unit only supports single /// precision. bool FPOnlySP; + /// If true, the processor supports the Performance Monitor Extensions. These + /// include a generic cycle-counter as well as more fine-grained (often + /// implementation-specific) events. + bool HasPerfMon; + /// HasTrustZone - if true, processor supports TrustZone security extensions bool HasTrustZone; + /// HasCrypto - if true, processor supports Cryptography extensions + bool HasCrypto; + + /// HasCRC - if true, processor supports CRC instructions + bool HasCRC; + /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory /// accesses for some types. For details, see /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). bool AllowsUnalignedMem; + /// RestrictIT - If true, the subtarget disallows generation of deprecated IT + /// blocks to conform to ARMv8 rule. + bool RestrictIT; + /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith /// and such) instructions in Thumb2 code. bool Thumb2DSP; @@ -187,10 +212,6 @@ protected: public: enum { - isELF, isDarwin - } TargetType; - - enum { ARM_ABI_APCS, ARM_ABI_AAPCS // ARM EABI } TargetABI; @@ -224,8 +245,10 @@ public: bool hasV5TOps() const { return HasV5TOps; } bool hasV5TEOps() const { return HasV5TEOps; } bool hasV6Ops() const { return HasV6Ops; } + bool hasV6MOps() const { return HasV6MOps; } bool hasV6T2Ops() const { return HasV6T2Ops; } bool hasV7Ops() const { return HasV7Ops; } + bool hasV8Ops() const { return HasV8Ops; } bool isCortexA5() const { return ARMProcFamily == CortexA5; } bool isCortexA8() const { return ARMProcFamily == CortexA8; } @@ -241,7 +264,11 @@ public: bool hasVFP2() const { return HasVFPv2; } bool hasVFP3() const { return HasVFPv3; } bool hasVFP4() const { return HasVFPv4; } + bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } + bool hasCrypto() const { return HasCrypto; } + bool hasCRC() const { return HasCRC; } + bool hasVirtualization() const { return HasVirtualization; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } @@ -249,11 +276,15 @@ public: bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } + bool hasAnyDataBarrier() const { + return HasDataBarrier || (hasV6Ops() && !isThumb()); + } bool useMulOps() const { return UseMulOps; } bool useFPVMLx() const { return !SlowFPVMLx; } bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } bool isFPOnlySP() const { return FPOnlySP; } + bool hasPerfMon() const { return HasPerfMon; } bool hasTrustZone() const { return HasTrustZone; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } @@ -268,12 +299,19 @@ public: const Triple &getTargetTriple() const { return TargetTriple; } - bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; } + bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } - bool isTargetNaCl() const { - return TargetTriple.getOS() == Triple::NaCl; - } + bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } + bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetELF() const { return !isTargetDarwin(); } + // ARM EABI is the bare-metal EABI described in ARM ABI documents and + // can be accessed via -target arm-none-eabi. This is NOT GNUEABI. + // FIXME: Add a flag for bare-metal for that target and set Triple::EABI + // even for GNUEABI, so we can make a distinction here and still conform to + // the EABI on GNU (and Android) mode. This requires change in Clang, too. + bool isTargetAEABI() const { + return TargetTriple.getEnvironment() == Triple::EABI; + } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } @@ -282,8 +320,9 @@ public: bool isThumb1Only() const { return InThumbMode && !HasThumb2; } bool isThumb2() const { return InThumbMode && HasThumb2; } bool hasThumb2() const { return HasThumb2; } - bool isMClass() const { return IsMClass; } - bool isARClass() const { return !IsMClass; } + bool isMClass() const { return ARMProcClass == MClass; } + bool isRClass() const { return ARMProcClass == RClass; } + bool isAClass() const { return ARMProcClass == AClass; } bool isR9Reserved() const { return IsR9Reserved; } @@ -292,9 +331,15 @@ public: bool allowsUnalignedMem() const { return AllowsUnalignedMem; } + bool restrictIT() const { return RestrictIT; } + const std::string & getCPUString() const { return CPUString; } unsigned getMispredictionPenalty() const; + + /// This function returns true if the target has sincos() routine in its + /// compiler runtime or math libraries. + bool hasSinCos() const; /// enablePostRAScheduler - True at 'More' optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 42c7d2c437e0..c2bf78877875 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -60,7 +60,7 @@ void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { // Add first the target-independent BasicTTI pass, then our ARM pass. This // allows the ARM pass to delegate to the target independent layer when // appropriate. - PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createBasicTargetTransformInfoPass(this)); PM.add(createARMTargetTransformInfoPass(this)); } @@ -85,6 +85,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { + initAsmInfo(); if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -117,6 +118,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget.hasThumb2() ? new ARMFrameLowering(Subtarget) : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { + initAsmInfo(); } namespace { @@ -148,7 +150,7 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) - addPass(createGlobalMergePass(TM->getTargetLowering())); + addPass(createGlobalMergePass(TM)); return false; } @@ -167,7 +169,7 @@ bool ARMPassConfig::addPreRegAlloc() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) addPass(createARMLoadStoreOptimizationPass(true)); - if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9()) + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) addPass(createMLxExpansionPass()); // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be // enabled when NEON is available. @@ -194,8 +196,13 @@ bool ARMPassConfig::addPreSched2() { addPass(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOpt::None) { - if (!getARMSubtarget().isThumb1Only()) + if (!getARMSubtarget().isThumb1Only()) { + // in v8, IfConversion depends on Thumb instruction widths + if (getARMSubtarget().restrictIT() && + !getARMSubtarget().prefers32BitThumb()) + addPass(createThumb2SizeReductionPass()); addPass(&IfConverterID); + } } if (getARMSubtarget().isThumb2()) addPass(createThumb2ITBlockPass()); diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp index dfdf6ab356a3..7ec71b20c64d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -47,7 +47,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, MCStreamer &Streamer) const { assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only"); - return MCSymbolRefExpr::Create(Mang->getSymbol(GV), + return MCSymbolRefExpr::Create(getSymbol(*Mang, GV), MCSymbolRefExpr::VK_ARM_TARGET2, getContext()); } diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 53ece668c3f5..6bbb38facc24 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -124,11 +124,14 @@ public: unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; - unsigned getAddressComputationCost(Type *Val) const; + unsigned getAddressComputationCost(Type *Val, bool IsComplex) const; unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue, OperandValueKind Op2Info = OK_AnyValue) const; + + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const; /// @} }; @@ -182,7 +185,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, assert(ISD && "Invalid opcode"); // Single to/from double precision conversions. - static const CostTblEntry<MVT> NEONFltDblTbl[] = { + static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = { // Vector fptrunc/fpext conversions. { ISD::FP_ROUND, MVT::v2f64, 2 }, { ISD::FP_EXTEND, MVT::v2f32, 2 }, @@ -192,8 +195,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || ISD == ISD::FP_EXTEND)) { std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); - int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl), - ISD, LT.second); + int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second); if (Idx != -1) return LT.first * NEONFltDblTbl[Idx].Cost; } @@ -207,7 +209,8 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, // Some arithmetic, load and store operations have specific instructions // to cast up/down their types automatically at no extra cost. // TODO: Get these tables to know at least what the related operations are. - static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = { + static const TypeConversionCostTblEntry<MVT::SimpleValueType> + NEONVectorConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, @@ -283,15 +286,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, }; if (SrcTy.isVector() && ST->hasNEON()) { - int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl, - array_lengthof(NEONVectorConversionTbl), - ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD, + DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return NEONVectorConversionTbl[Idx].Cost; } // Scalar float to integer conversions. - static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = { + static const TypeConversionCostTblEntry<MVT::SimpleValueType> + NEONFloatConversionTbl[] = { { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, @@ -314,16 +317,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } }; if (SrcTy.isFloatingPoint() && ST->hasNEON()) { - int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl, - array_lengthof(NEONFloatConversionTbl), - ISD, DstTy.getSimpleVT(), - SrcTy.getSimpleVT()); + int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD, + DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return NEONFloatConversionTbl[Idx].Cost; } // Scalar integer to float conversions. - static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = { + static const TypeConversionCostTblEntry<MVT::SimpleValueType> + NEONIntegerConversionTbl[] = { { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, @@ -347,16 +349,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, }; if (SrcTy.isInteger() && ST->hasNEON()) { - int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl, - array_lengthof(NEONIntegerConversionTbl), - ISD, DstTy.getSimpleVT(), - SrcTy.getSimpleVT()); + int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD, + DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return NEONIntegerConversionTbl[Idx].Cost; } // Scalar integer conversion costs. - static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = { + static const TypeConversionCostTblEntry<MVT::SimpleValueType> + ARMIntegerConversionTbl[] = { // i16 -> i64 requires two dependent operations. { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, @@ -368,11 +369,8 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, }; if (SrcTy.isInteger()) { - int Idx = - ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl, - array_lengthof(ARMIntegerConversionTbl), - ISD, DstTy.getSimpleVT(), - SrcTy.getSimpleVT()); + int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD, + DstTy.getSimpleVT(), SrcTy.getSimpleVT()); if (Idx != -1) return ARMIntegerConversionTbl[Idx].Cost; } @@ -400,7 +398,8 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // On NEON a a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { // Lowering of some vector selects is currently far from perfect. - static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = { + static const TypeConversionCostTblEntry<MVT::SimpleValueType> + NEONVectorSelectTbl[] = { { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, @@ -411,12 +410,13 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, EVT SelCondTy = TLI->getValueType(CondTy); EVT SelValTy = TLI->getValueType(ValTy); - int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, - array_lengthof(NEONVectorSelectTbl), - ISD, SelCondTy.getSimpleVT(), - SelValTy.getSimpleVT()); - if (Idx != -1) - return NEONVectorSelectTbl[Idx].Cost; + if (SelCondTy.isSimple() && SelValTy.isSimple()) { + int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, + SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorSelectTbl[Idx].Cost; + } std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); return LT.first; @@ -425,7 +425,16 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned ARMTTI::getAddressComputationCost(Type *Ty) const { +unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { + // Address computations in vectorized code with non-consecutive addresses will + // likely result in more instructions compared to scalar code where the + // computation can more often be merged into the index mode. The resulting + // extra micro-ops can significantly decrease throughput. + unsigned NumVectorInstToHideOverhead = 10; + + if (Ty->isVectorTy() && IsComplex) + return NumVectorInstToHideOverhead; + // In many cases the address computation is not merged into the instruction // addressing mode. return 1; @@ -437,7 +446,7 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, if (Kind != SK_Reverse) return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - static const CostTblEntry<MVT> NEONShuffleTbl[] = { + static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = { // Reverse shuffle cost one instruction if we are shuffling within a double // word (vrev) or two if we shuffle a quad word (vrev, vext). { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 }, @@ -453,8 +462,7 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); - int Idx = CostTableLookup<MVT>(NEONShuffleTbl, array_lengthof(NEONShuffleTbl), - ISD::VECTOR_SHUFFLE, LT.second); + int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); if (Idx == -1) return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); @@ -469,7 +477,7 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK const unsigned FunctionCallDivCost = 20; const unsigned ReciprocalDivCost = 10; - static const CostTblEntry<MVT> CostTbl[] = { + static const CostTblEntry<MVT::SimpleValueType> CostTbl[] = { // Division. // These costs are somewhat random. Choose a cost of 20 to indicate that // vectorizing devision (added function call) is going to be very expensive. @@ -513,14 +521,37 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK int Idx = -1; if (ST->hasNEON()) - Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode, - LT.second); + Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second); if (Idx != -1) return LT.first * CostTbl[Idx].Cost; - - return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, - Op2Info); + unsigned Cost = + TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); + + // This is somewhat of a hack. The problem that we are facing is that SROA + // creates a sequence of shift, and, or instructions to construct values. + // These sequences are recognized by the ISel and have zero-cost. Not so for + // the vectorized code. Because we have support for v2i64 but not i64 those + // sequences look particularily beneficial to vectorize. + // To work around this we increase the cost of v2i64 operations to make them + // seem less beneficial. + if (LT.second == MVT::v2i64 && + Op2Info == TargetTransformInfo::OK_UniformConstantValue) + Cost += 4; + + return Cost; } +unsigned ARMTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const { + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + + if (Src->isVectorTy() && Alignment != 16 && + Src->getVectorElementType()->isDoubleTy()) { + // Unaligned loads/stores are extremely inefficient. + // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr. + return LT.first * 4; + } + return LT.first; +} diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 1dd2953b29b9..e3f9e0dc609a 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -7,6 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMBuildAttrs.h" +#include "ARMFPUName.h" +#include "ARMFeatures.h" #include "llvm/MC/MCTargetAsmParser.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -24,6 +27,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -47,11 +51,33 @@ enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + const MCInstrInfo &MII; const MCRegisterInfo *MRI; + ARMTargetStreamer &getTargetStreamer() { + MCTargetStreamer &TS = getParser().getStreamer().getTargetStreamer(); + return static_cast<ARMTargetStreamer &>(TS); + } + + // Unwind directives state + SMLoc FnStartLoc; + SMLoc CantUnwindLoc; + SMLoc PersonalityLoc; + SMLoc HandlerDataLoc; + int FPReg; + void resetUnwindDirectiveParserState() { + FnStartLoc = SMLoc(); + CantUnwindLoc = SMLoc(); + PersonalityLoc = SMLoc(); + HandlerDataLoc = SMLoc(); + FPReg = -1; + } + // Map of register aliases registers via the .req directive. StringMap<unsigned> RegisterReqs; + bool NextSymbolIsThumb; + struct { ARMCC::CondCodes Cond; // Condition for IT block. unsigned Mask:4; // Condition mask for instructions. @@ -76,7 +102,7 @@ class ARMAsmParser : public MCTargetAsmParser { if (!inITBlock()) return; // Move to the next instruction in the IT block, if there is one. If not, // mark the block as done. - unsigned TZ = CountTrailingZeros_32(ITState.Mask); + unsigned TZ = countTrailingZeros(ITState.Mask); if (++ITState.CurPosition == 5 - TZ) ITState.CurPosition = ~0U; // Done with the IT block after this. } @@ -113,11 +139,22 @@ class ARMAsmParser : public MCTargetAsmParser { bool parseDirectiveUnreq(SMLoc L); bool parseDirectiveArch(SMLoc L); bool parseDirectiveEabiAttr(SMLoc L); + bool parseDirectiveCPU(SMLoc L); + bool parseDirectiveFPU(SMLoc L); + bool parseDirectiveFnStart(SMLoc L); + bool parseDirectiveFnEnd(SMLoc L); + bool parseDirectiveCantUnwind(SMLoc L); + bool parseDirectivePersonality(SMLoc L); + bool parseDirectiveHandlerData(SMLoc L); + bool parseDirectiveSetFP(SMLoc L); + bool parseDirectivePad(SMLoc L); + bool parseDirectiveRegSave(SMLoc L, bool IsVector); StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, StringRef &ITMask); - void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, + void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, + bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode); bool isThumb() const { @@ -130,12 +167,25 @@ class ARMAsmParser : public MCTargetAsmParser { bool isThumbTwo() const { return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2); } + bool hasThumb() const { + return STI.getFeatureBits() & ARM::HasV4TOps; + } bool hasV6Ops() const { return STI.getFeatureBits() & ARM::HasV6Ops; } + bool hasV6MOps() const { + return STI.getFeatureBits() & ARM::HasV6MOps; + } bool hasV7Ops() const { return STI.getFeatureBits() & ARM::HasV7Ops; } + bool hasV8Ops() const { + return STI.getFeatureBits() & ARM::HasV8Ops; + } + bool hasARM() const { + return !(STI.getFeatureBits() & ARM::FeatureNoARM); + } + void SwitchMode() { unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); setAvailableFeatures(FB); @@ -161,6 +211,8 @@ class ARMAsmParser : public MCTargetAsmParser { SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseMemBarrierOptOperand( SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseInstSyncBarrierOptOperand( + SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseProcIFlagsOperand( SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseMSRMaskOperand( @@ -185,51 +237,19 @@ class ARMAsmParser : public MCTargetAsmParser { SMLoc &EndLoc); // Asm Match Converter Methods - void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); void cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtVLDwbFixed(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtVLDwbRegister(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtVSTwbFixed(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtVSTwbRegister(MCInst &Inst, + void cvtThumbBranches(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + bool validateInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); bool processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); bool shouldOmitCCOutOperand(StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand*> &Operands); - + bool shouldOmitPredicateOperand(StringRef Mnemonic, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); public: enum ARMMatchResultTy { Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY, @@ -241,12 +261,13 @@ public: }; - ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, + const MCInstrInfo &MII) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), FPReg(-1) { MCAsmParserExtension::Initialize(_Parser); // Cache the MCRegisterInfo. - MRI = &getContext().getRegisterInfo(); + MRI = getContext().getRegisterInfo(); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -254,12 +275,7 @@ public: // Not in an ITBlock to start with. ITState.CurPosition = ~0U; - // Set ELF header flags. - // FIXME: This should eventually end up somewhere else where more - // intelligent flag decisions can be made. For now we are just maintaining - // the statu/parseDirects quo for ARM and setting EF_ARM_EABI_VER5 as the default. - if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&Parser.getStreamer())) - MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); + NextSymbolIsThumb = false; } // Implementation of the MCTargetAsmParser interface: @@ -276,6 +292,8 @@ public: SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out, unsigned &ErrorInfo, bool MatchingInlineAsm); + void onLabelParsed(MCSymbol *Symbol); + }; } // end anonymous namespace @@ -293,6 +311,7 @@ class ARMOperand : public MCParsedAsmOperand { k_CoprocOption, k_Immediate, k_MemBarrierOpt, + k_InstSyncBarrierOpt, k_Memory, k_PostIndexRegister, k_MSRMask, @@ -336,6 +355,10 @@ class ARMOperand : public MCParsedAsmOperand { ARM_MB::MemBOpt Val; }; + struct ISBOptOp { + ARM_ISB::InstSyncBOpt Val; + }; + struct IFlagsOp { ARM_PROC::IFlags Val; }; @@ -422,6 +445,7 @@ class ARMOperand : public MCParsedAsmOperand { struct CopOp Cop; struct CoprocOptionOp CoprocOption; struct MBOptOp MBOpt; + struct ISBOptOp ISBOpt; struct ITMaskOp ITMask; struct IFlagsOp IFlags; struct MMaskOp MMask; @@ -482,6 +506,8 @@ public: case k_MemBarrierOpt: MBOpt = o.MBOpt; break; + case k_InstSyncBarrierOpt: + ISBOpt = o.ISBOpt; case k_Memory: Memory = o.Memory; break; @@ -564,6 +590,11 @@ public: return MBOpt.Val; } + ARM_ISB::InstSyncBOpt getInstSyncBarrierOpt() const { + assert(Kind == k_InstSyncBarrierOpt && "Invalid access!"); + return ISBOpt.Val; + } + ARM_PROC::IFlags getProcIFlags() const { assert(Kind == k_ProcIFlags && "Invalid access!"); return IFlags.Val; @@ -582,6 +613,56 @@ public: bool isITMask() const { return Kind == k_ITCondMask; } bool isITCondCode() const { return Kind == k_CondCode; } bool isImm() const { return Kind == k_Immediate; } + // checks whether this operand is an unsigned offset which fits is a field + // of specified width and scaled by a specific number of bits + template<unsigned width, unsigned scale> + bool isUnsignedOffset() const { + if (!isImm()) return false; + if (isa<MCSymbolRefExpr>(Imm.Val)) return true; + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) { + int64_t Val = CE->getValue(); + int64_t Align = 1LL << scale; + int64_t Max = Align * ((1LL << width) - 1); + return ((Val % Align) == 0) && (Val >= 0) && (Val <= Max); + } + return false; + } + // checks whether this operand is an signed offset which fits is a field + // of specified width and scaled by a specific number of bits + template<unsigned width, unsigned scale> + bool isSignedOffset() const { + if (!isImm()) return false; + if (isa<MCSymbolRefExpr>(Imm.Val)) return true; + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) { + int64_t Val = CE->getValue(); + int64_t Align = 1LL << scale; + int64_t Max = Align * ((1LL << (width-1)) - 1); + int64_t Min = -Align * (1LL << (width-1)); + return ((Val % Align) == 0) && (Val >= Min) && (Val <= Max); + } + return false; + } + + // checks whether this operand is a memory operand computed as an offset + // applied to PC. the offset may have 8 bits of magnitude and is represented + // with two bits of shift. textually it may be either [pc, #imm], #imm or + // relocable expression... + bool isThumbMemPC() const { + int64_t Val = 0; + if (isImm()) { + if (isa<MCSymbolRefExpr>(Imm.Val)) return true; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val); + if (!CE) return false; + Val = CE->getValue(); + } + else if (isMem()) { + if(!Memory.OffsetImm || Memory.OffsetRegNum) return false; + if(Memory.BaseRegNum != ARM::PC) return false; + Val = Memory.OffsetImm->getValue(); + } + else return false; + return ((Val % 4) == 0) && (Val >= 0) && (Val <= 1020); + } bool isFPImm() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -610,13 +691,6 @@ public: int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020; } - bool isImm0_4() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < 5; - } bool isImm0_1020s4() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -639,6 +713,13 @@ public: // explicitly exclude zero. we want that to use the normal 0_508 version. return ((Value & 3) == 0) && Value > 0 && Value <= 508; } + bool isImm0_239() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 240; + } bool isImm0_255() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -800,6 +881,15 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 65536; } + bool isImm256_65535Expr() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // If it's not a constant expression, it'll generate a fixup and be + // handled later. + if (!CE) return true; + int64_t Value = CE->getValue(); + return Value >= 256 && Value < 65536; + } bool isImm0_65535Expr() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -879,7 +969,8 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return ARM_AM::getT2SOImmVal(~Value) != -1; + return ARM_AM::getT2SOImmVal(Value) == -1 && + ARM_AM::getT2SOImmVal(~Value) != -1; } bool isT2SOImmNeg() const { if (!isImm()) return false; @@ -903,6 +994,7 @@ public: bool isSPRRegList() const { return Kind == k_SPRRegisterList; } bool isToken() const { return Kind == k_Token; } bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; } + bool isInstSyncBarrierOpt() const { return Kind == k_InstSyncBarrierOpt; } bool isMem() const { return Kind == k_Memory; } bool isShifterImm() const { return Kind == k_ShifterImmediate; } bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; } @@ -949,7 +1041,7 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); - return Val > -4096 && Val < 4096; + return (Val == INT32_MIN) || (Val > -4096 && Val < 4096); } bool isAddrMode3() const { // If we have an immediate that's not a constant, treat it as a label @@ -1659,6 +1751,37 @@ public: Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); } + void addUnsignedOffset_b8s2Operands(MCInst &Inst, unsigned N) const { + if(const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { + Inst.addOperand(MCOperand::CreateImm(CE->getValue() >> 2)); + return; + } + + const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val); + assert(SR && "Unknown value type!"); + Inst.addOperand(MCOperand::CreateExpr(SR)); + } + + void addThumbMemPCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + if (isImm()) { + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (CE) { + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + return; + } + + const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val); + assert(SR && "Unknown value type!"); + Inst.addOperand(MCOperand::CreateExpr(SR)); + return; + } + + assert(isMem() && "Unknown value type!"); + assert(isa<MCConstantExpr>(Memory.OffsetImm) && "Unknown value type!"); + Inst.addOperand(MCOperand::CreateImm(Memory.OffsetImm->getValue())); + } + void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a so_imm, but we have its bitwise @@ -1680,6 +1803,11 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); } + void addInstSyncBarrierOptOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getInstSyncBarrierOpt()))); + } + void addMemNoOffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); @@ -1688,8 +1816,6 @@ public: void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); int32_t Imm = Memory.OffsetImm->getValue(); - // FIXME: Handle #-0 - if (Imm == INT32_MIN) Imm = 0; Inst.addOperand(MCOperand::CreateImm(Imm)); } @@ -2228,21 +2354,24 @@ public: } static ARMOperand * - CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs, + CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned> > &Regs, SMLoc StartLoc, SMLoc EndLoc) { + assert (Regs.size() > 0 && "RegList contains no registers?"); KindTy Kind = k_RegisterList; - if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().first)) + if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().second)) Kind = k_DPRRegisterList; else if (ARMMCRegisterClasses[ARM::SPRRegClassID]. - contains(Regs.front().first)) + contains(Regs.front().second)) Kind = k_SPRRegisterList; + // Sort based on the register encoding values. + array_pod_sort(Regs.begin(), Regs.end()); + ARMOperand *Op = new ARMOperand(Kind); - for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator + for (SmallVectorImpl<std::pair<unsigned, unsigned> >::const_iterator I = Regs.begin(), E = Regs.end(); I != E; ++I) - Op->Registers.push_back(I->first); - array_pod_sort(Op->Registers.begin(), Op->Registers.end()); + Op->Registers.push_back(I->second); Op->StartLoc = StartLoc; Op->EndLoc = EndLoc; return Op; @@ -2345,6 +2474,15 @@ public: return Op; } + static ARMOperand *CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, + SMLoc S) { + ARMOperand *Op = new ARMOperand(k_InstSyncBarrierOpt); + Op->ISBOpt.Val = Opt; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) { ARMOperand *Op = new ARMOperand(k_ProcIFlags); Op->IFlags.Val = IFlags; @@ -2397,7 +2535,10 @@ void ARMOperand::print(raw_ostream &OS) const { getImm()->print(OS); break; case k_MemBarrierOpt: - OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">"; + OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt(), false) << ">"; + break; + case k_InstSyncBarrierOpt: + OS << "<ARM_ISB::" << InstSyncBOptToString(getInstSyncBarrierOpt()) << ">"; break; case k_Memory: OS << "<memory " @@ -2731,8 +2872,9 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { return -1; switch (Name[2]) { default: return -1; - case '0': return 10; - case '1': return 11; + // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON) + case '0': return CoprocOp == 'p'? -1: 10; + case '1': return CoprocOp == 'p'? -1: 11; case '2': return 12; case '3': return 13; case '4': return 14; @@ -2910,12 +3052,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // The reglist instructions have at most 16 registers, so reserve // space for that many. - SmallVector<std::pair<unsigned, SMLoc>, 16> Registers; + int EReg = 0; + SmallVector<std::pair<unsigned, unsigned>, 16> Registers; // Allow Q regs and just interpret them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); - Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg)); ++Reg; } const MCRegisterClass *RC; @@ -2929,7 +3073,8 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return Error(RegLoc, "invalid register in register list"); // Store the register. - Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg)); // This starts immediately after the first register token in the list, // so we can see either a comma or a minus (range separator) as a legal @@ -2959,7 +3104,8 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Add all the registers in the range to the register list. while (Reg != EndReg) { Reg = getNextRegister(Reg); - Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg)); } continue; } @@ -2992,14 +3138,15 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { continue; } // VFP register lists must also be contiguous. - // It's OK to use the enumeration values directly here rather, as the - // VFP register classes have the enum sorted properly. if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] && Reg != OldReg + 1) return Error(RegLoc, "non-contiguous register range"); - Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); - if (isQReg) - Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg)); + if (isQReg) { + EReg = MRI->getEncodingValue(++Reg); + Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg)); + } } if (Parser.getTok().isNot(AsmToken::RCurly)) @@ -3036,7 +3183,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { // There's an optional '#' token here. Normally there wouldn't be, but // inline assemble puts one in, and it's friendly to accept that. if (Parser.getTok().is(AsmToken::Hash)) - Parser.Lex(); // Eat the '#' + Parser.Lex(); // Eat '#' or '$'. const MCExpr *LaneIndex; SMLoc Loc = Parser.getTok().getLoc(); @@ -3334,18 +3481,27 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower()) .Case("sy", ARM_MB::SY) .Case("st", ARM_MB::ST) + .Case("ld", ARM_MB::LD) .Case("sh", ARM_MB::ISH) .Case("ish", ARM_MB::ISH) .Case("shst", ARM_MB::ISHST) .Case("ishst", ARM_MB::ISHST) + .Case("ishld", ARM_MB::ISHLD) .Case("nsh", ARM_MB::NSH) .Case("un", ARM_MB::NSH) .Case("nshst", ARM_MB::NSHST) + .Case("nshld", ARM_MB::NSHLD) .Case("unst", ARM_MB::NSHST) .Case("osh", ARM_MB::OSH) .Case("oshst", ARM_MB::OSHST) + .Case("oshld", ARM_MB::OSHLD) .Default(~0U); + // ishld, oshld, nshld and ld are only available from ARMv8. + if (!hasV8Ops() && (Opt == ARM_MB::ISHLD || Opt == ARM_MB::OSHLD || + Opt == ARM_MB::NSHLD || Opt == ARM_MB::LD)) + Opt = ~0U; + if (Opt == ~0U) return MatchOperand_NoMatch; @@ -3354,7 +3510,7 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Tok.is(AsmToken::Dollar) || Tok.is(AsmToken::Integer)) { if (Parser.getTok().isNot(AsmToken::Integer)) - Parser.Lex(); // Eat the '#'. + Parser.Lex(); // Eat '#' or '$'. SMLoc Loc = Parser.getTok().getLoc(); const MCExpr *MemBarrierID; @@ -3383,6 +3539,57 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } +/// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + unsigned Opt; + + if (Tok.is(AsmToken::Identifier)) { + StringRef OptStr = Tok.getString(); + + if (OptStr.equals_lower("sy")) + Opt = ARM_ISB::SY; + else + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat identifier token. + } else if (Tok.is(AsmToken::Hash) || + Tok.is(AsmToken::Dollar) || + Tok.is(AsmToken::Integer)) { + if (Parser.getTok().isNot(AsmToken::Integer)) + Parser.Lex(); // Eat '#' or '$'. + SMLoc Loc = Parser.getTok().getLoc(); + + const MCExpr *ISBarrierID; + if (getParser().parseExpression(ISBarrierID)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ISBarrierID); + if (!CE) { + Error(Loc, "constant expression expected"); + return MatchOperand_ParseFail; + } + + int Val = CE->getValue(); + if (Val & ~0xf) { + Error(Loc, "immediate value out of range"); + return MatchOperand_ParseFail; + } + + Opt = ARM_ISB::RESERVED_0 + Val; + } else + return MatchOperand_ParseFail; + + Operands.push_back(ARMOperand::CreateInstSyncBarrierOpt( + (ARM_ISB::InstSyncBOpt)Opt, S)); + return MatchOperand_Success; +} + + /// parseProcIFlagsOperand - Try to parse iflags from CPS instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -3602,7 +3809,7 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Error(S, "'be' or 'le' operand expected"); return MatchOperand_ParseFail; } - int Val = StringSwitch<int>(Tok.getString()) + int Val = StringSwitch<int>(Tok.getString().lower()) .Case("be", 1) .Case("le", 0) .Default(-1); @@ -3875,7 +4082,7 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Do immediates first, as we always parse those if we have a '#'. if (Parser.getTok().is(AsmToken::Hash) || Parser.getTok().is(AsmToken::Dollar)) { - Parser.Lex(); // Eat the '#'. + Parser.Lex(); // Eat '#' or '$'. // Explicitly look for a '-', as we need to encode negative zero // differently. bool isNegative = Parser.getTok().is(AsmToken::Minus); @@ -3926,260 +4133,9 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// cvtT2LdrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtT2LdrdPre(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateReg(0)); - // addr - ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtT2StrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtT2StrdPre(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateReg(0)); - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - - ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - - ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - - ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - - -/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdrdPre(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // addr - ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStrdPre(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtThumbMultiply - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. +/// Convert parsed operands to MCInst. Needed here because this instruction +/// only has two register operands, but multiplication is commutative so +/// assemblers should accept both "mul rD, rN, rD" and "mul rD, rD, rN". void ARMAsmParser:: cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -4198,59 +4154,62 @@ cvtThumbMultiply(MCInst &Inst, } void ARMAsmParser:: -cvtVLDwbFixed(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Vd - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} +cvtThumbBranches(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + int CondOp = -1, ImmOp = -1; + switch(Inst.getOpcode()) { + case ARM::tB: + case ARM::tBcc: CondOp = 1; ImmOp = 2; break; -void ARMAsmParser:: -cvtVLDwbRegister(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Vd - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // Vm - ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} + case ARM::t2B: + case ARM::t2Bcc: CondOp = 1; ImmOp = 3; break; -void ARMAsmParser:: -cvtVSTwbFixed(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // Vt - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -void ARMAsmParser:: -cvtVSTwbRegister(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // Vm - ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); - // Vt - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + default: llvm_unreachable("Unexpected instruction in cvtThumbBranches"); + } + // first decide whether or not the branch should be conditional + // by looking at it's location relative to an IT block + if(inITBlock()) { + // inside an IT block we cannot have any conditional branches. any + // such instructions needs to be converted to unconditional form + switch(Inst.getOpcode()) { + case ARM::tBcc: Inst.setOpcode(ARM::tB); break; + case ARM::t2Bcc: Inst.setOpcode(ARM::t2B); break; + } + } else { + // outside IT blocks we can only have unconditional branches with AL + // condition code or conditional branches with non-AL condition code + unsigned Cond = static_cast<ARMOperand*>(Operands[CondOp])->getCondCode(); + switch(Inst.getOpcode()) { + case ARM::tB: + case ARM::tBcc: + Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc); + break; + case ARM::t2B: + case ARM::t2Bcc: + Inst.setOpcode(Cond == ARMCC::AL ? ARM::t2B : ARM::t2Bcc); + break; + } + } + + // now decide on encoding size based on branch target range + switch(Inst.getOpcode()) { + // classify tB as either t2B or t1B based on range of immediate operand + case ARM::tB: { + ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]); + if(!op->isSignedOffset<11, 1>() && isThumbTwo()) + Inst.setOpcode(ARM::t2B); + break; + } + // classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand + case ARM::tBcc: { + ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]); + if(!op->isSignedOffset<8, 1>() && isThumbTwo()) + Inst.setOpcode(ARM::t2Bcc); + break; + } + } + ((ARMOperand*)Operands[ImmOp])->addImmOperands(Inst, 1); + ((ARMOperand*)Operands[CondOp])->addCondCodeOperands(Inst, 2); } /// Parse an ARM memory expression, return false if successful else return true @@ -4354,7 +4313,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.getTok().is(AsmToken::Dollar) || Parser.getTok().is(AsmToken::Integer)) { if (Parser.getTok().isNot(AsmToken::Integer)) - Parser.Lex(); // Eat the '#'. + Parser.Lex(); // Eat '#' or '$'. E = Parser.getTok().getLoc(); bool isNegative = getParser().getTok().is(AsmToken::Minus); @@ -4536,7 +4495,7 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { TyOp->getToken() != ".f64")) return MatchOperand_NoMatch; - Parser.Lex(); // Eat the '#'. + Parser.Lex(); // Eat '#' or '$'. // Handle negation, as that still comes through as a separate token. bool isNegative = false; @@ -4752,11 +4711,14 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" || Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" || - Mnemonic == "vaclt" || Mnemonic == "vacle" || + Mnemonic == "vaclt" || Mnemonic == "vacle" || Mnemonic == "hlt" || Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || - Mnemonic == "fmuls") + Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || + Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || + Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || + Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("vsel")) return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -4836,8 +4798,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, // // FIXME: It would be nice to autogen this. void ARMAsmParser:: -getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, - bool &CanAcceptPredicationCode) { +getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, + bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) { if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" || Mnemonic == "add" || Mnemonic == "adc" || @@ -4853,28 +4815,35 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, } else CanAcceptCarrySet = false; - if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" || - Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" || - Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" || - Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" || - Mnemonic == "dsb" || Mnemonic == "isb" || Mnemonic == "setend" || - (Mnemonic == "clrex" && !isThumb()) || - (Mnemonic == "nop" && isThumbOne()) || - ((Mnemonic == "pld" || Mnemonic == "pli" || Mnemonic == "pldw" || - Mnemonic == "ldc2" || Mnemonic == "ldc2l" || - Mnemonic == "stc2" || Mnemonic == "stc2l") && !isThumb()) || - ((Mnemonic.startswith("rfe") || Mnemonic.startswith("srs")) && - !isThumb()) || - Mnemonic.startswith("cps") || (Mnemonic == "movs" && isThumbOne())) { + if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || + Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || + Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic.startswith("crc32") || + Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") || + Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || + Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || + Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || + Mnemonic == "vrintm" || Mnemonic.startswith("aes") || + Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || + (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) { + // These mnemonics are never predicable CanAcceptPredicationCode = false; + } else if (!isThumb()) { + // Some instructions are only predicable in Thumb mode + CanAcceptPredicationCode + = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" && + Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" && + Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" && + Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" && + Mnemonic != "ldc2" && Mnemonic != "ldc2l" && + Mnemonic != "stc2" && Mnemonic != "stc2l" && + !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs"); + } else if (isThumbOne()) { + if (hasV6MOps()) + CanAcceptPredicationCode = Mnemonic != "movs"; + else + CanAcceptPredicationCode = Mnemonic != "nop" && Mnemonic != "movs"; } else CanAcceptPredicationCode = true; - - if (isThumb()) { - if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" || - Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp") - CanAcceptPredicationCode = false; - } } bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, @@ -4929,15 +4898,6 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, static_cast<ARMOperand*>(Operands[5])->isImm()) { // Nest conditions rather than one big 'if' statement for readability. // - // If either register is a high reg, it's either one of the SP - // variants (handled above) or a 32-bit encoding, so we just - // check against T3. If the second register is the PC, this is an - // alternate form of ADR, which uses encoding T4, so check for that too. - if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || - !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) && - static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC && - static_cast<ARMOperand*>(Operands[5])->isT2SOImm()) - return false; // If both registers are low, we're in an IT block, and the immediate is // in range, we should use encoding T1 instead, which has a cc_out. if (inITBlock() && @@ -4945,6 +4905,11 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) && static_cast<ARMOperand*>(Operands[5])->isImm0_7()) return false; + // Check against T3. If the second register is the PC, this is an + // alternate form of ADR, which uses encoding T4, so check for that too. + if (static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC && + static_cast<ARMOperand*>(Operands[5])->isT2SOImm()) + return false; // Otherwise, we use encoding T4, which does not have a cc_out // operand. @@ -5007,6 +4972,26 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, return false; } +bool ARMAsmParser::shouldOmitPredicateOperand( + StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand *> &Operands) { + // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON + unsigned RegIdx = 3; + if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") && + static_cast<ARMOperand *>(Operands[2])->getToken() == ".f32") { + if (static_cast<ARMOperand *>(Operands[3])->isToken() && + static_cast<ARMOperand *>(Operands[3])->getToken() == ".f32") + RegIdx = 4; + + if (static_cast<ARMOperand *>(Operands[RegIdx])->isReg() && + (ARMMCRegisterClasses[ARM::DPRRegClassID] + .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()) || + ARMMCRegisterClasses[ARM::QPRRegClassID] + .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()))) + return true; + } + return false; +} + static bool isDataTypeToken(StringRef Tok) { return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" || Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" || @@ -5102,7 +5087,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // the matcher deal with finding the right instruction or generating an // appropriate error. bool CanAcceptCarrySet, CanAcceptPredicationCode; - getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode); + getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode); // If we had a carry-set on an instruction that can't do that, issue an // error. @@ -5153,7 +5138,17 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken)) continue; - if (ExtraToken != ".n") { + // For for ARM mode generate an error if the .n qualifier is used. + if (ExtraToken == ".n" && !isThumb()) { + SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); + return Error(Loc, "instruction with .n (narrow) qualifier not allowed in " + "arm mode"); + } + + // The .n qualifier is always discarded as that is what the tables + // and matcher expect. In ARM mode the .w qualifier has no effect, + // so discard it to avoid errors that can be caused by the matcher. + if (ExtraToken != ".n" && (isThumb() || ExtraToken != ".w")) { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc)); } @@ -5199,6 +5194,15 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, delete Op; } + // Some instructions have the same mnemonic, but don't always + // have a predicate. Distinguish them here and delete the + // predicate if needed. + if (shouldOmitPredicateOperand(Mnemonic, Operands)) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + Operands.erase(Operands.begin() + 1); + delete Op; + } + // ARM mode 'blx' need special handling, as the register operand version // is predicable, but the label operand version is not. So, we can't rely // on the Mnemonic based checking to correctly figure out when to put @@ -5218,8 +5222,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // expressed as a GPRPair, so we have to manually merge them. // FIXME: We would really like to be able to tablegen'erate this. if (!isThumb() && Operands.size() > 4 && - (Mnemonic == "ldrexd" || Mnemonic == "strexd")) { - bool isLoad = (Mnemonic == "ldrexd"); + (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" || + Mnemonic == "stlexd")) { + bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd"); unsigned Idx = isLoad ? 2 : 3; ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]); ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]); @@ -5250,6 +5255,26 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } + // FIXME: As said above, this is all a pretty gross hack. This instruction + // does not fit with other "subs" and tblgen. + // Adjust operands of B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction + // so the Mnemonic is the original name "subs" and delete the predicate + // operand so it will match the table entry. + if (isThumbTwo() && Mnemonic == "sub" && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::PC && + static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::LR && + static_cast<ARMOperand*>(Operands[5])->isImm()) { + ARMOperand *Op0 = static_cast<ARMOperand*>(Operands[0]); + Operands.erase(Operands.begin()); + delete Op0; + Operands.insert(Operands.begin(), ARMOperand::CreateToken(Name, NameLoc)); + + ARMOperand *Op1 = static_cast<ARMOperand*>(Operands[1]); + Operands.erase(Operands.begin() + 1); + delete Op1; + } return false; } @@ -5283,45 +5308,44 @@ static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) { return false; } -// FIXME: We would really prefer to have MCInstrInfo (the wrapper around -// the ARMInsts array) instead. Getting that here requires awkward -// API changes, though. Better way? -namespace llvm { -extern const MCInstrDesc ARMInsts[]; -} -static const MCInstrDesc &getInstDesc(unsigned Opcode) { - return ARMInsts[Opcode]; +// Return true if instruction has the interesting property of being +// allowed in IT blocks, but not being predicable. +static bool instIsBreakpoint(const MCInst &Inst) { + return Inst.getOpcode() == ARM::tBKPT || + Inst.getOpcode() == ARM::BKPT || + Inst.getOpcode() == ARM::tHLT || + Inst.getOpcode() == ARM::HLT; + } // FIXME: We would really like to be able to tablegen'erate this. bool ARMAsmParser:: validateInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); SMLoc Loc = Operands[0]->getStartLoc(); + // Check the IT block state first. - // NOTE: BKPT instruction has the interesting property of being - // allowed in IT blocks, but not being predicable. It just always - // executes. - if (inITBlock() && Inst.getOpcode() != ARM::tBKPT && - Inst.getOpcode() != ARM::BKPT) { - unsigned bit = 1; + // NOTE: BKPT and HLT instructions have the interesting property of being + // allowed in IT blocks, but not being predicable. They just always execute. + if (inITBlock() && !instIsBreakpoint(Inst)) { + unsigned Bit = 1; if (ITState.FirstCond) ITState.FirstCond = false; else - bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1; + Bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1; // The instruction must be predicable. if (!MCID.isPredicable()) return Error(Loc, "instructions in IT block must be predicable"); unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm(); - unsigned ITCond = bit ? ITState.Cond : + unsigned ITCond = Bit ? ITState.Cond : ARMCC::getOppositeCondition(ITState.Cond); if (Cond != ITCond) { // Find the condition code Operand to get its SMLoc information. SMLoc CondLoc; - for (unsigned i = 1; i < Operands.size(); ++i) - if (static_cast<ARMOperand*>(Operands[i])->isCondCode()) - CondLoc = Operands[i]->getStartLoc(); + for (unsigned I = 1; I < Operands.size(); ++I) + if (static_cast<ARMOperand*>(Operands[I])->isCondCode()) + CondLoc = Operands[I]->getStartLoc(); return Error(CondLoc, "incorrect condition in IT block; got '" + StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) + "', but expected '" + @@ -5330,20 +5354,55 @@ validateInstruction(MCInst &Inst, // Check for non-'al' condition codes outside of the IT block. } else if (isThumbTwo() && MCID.isPredicable() && Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() != - ARMCC::AL && Inst.getOpcode() != ARM::tB && - Inst.getOpcode() != ARM::t2B) + ARMCC::AL && Inst.getOpcode() != ARM::tBcc && + Inst.getOpcode() != ARM::t2Bcc) return Error(Loc, "predicated instructions must be in IT block"); - switch (Inst.getOpcode()) { + const unsigned Opcode = Inst.getOpcode(); + switch (Opcode) { case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: { + const unsigned RtReg = Inst.getOperand(0).getReg(); + + // Rt can't be R14. + if (RtReg == ARM::LR) + return Error(Operands[3]->getStartLoc(), + "Rt can't be R14"); + + const unsigned Rt = MRI->getEncodingValue(RtReg); + // Rt must be even-numbered. + if ((Rt & 1) == 1) + return Error(Operands[3]->getStartLoc(), + "Rt must be even-numbered"); + // Rt2 must be Rt + 1. - unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); - unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg()); + const unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg()); if (Rt2 != Rt + 1) return Error(Operands[3]->getStartLoc(), "destination operands must be sequential"); + + if (Opcode == ARM::LDRD_PRE || Opcode == ARM::LDRD_POST) { + const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(3).getReg()); + // For addressing modes with writeback, the base register needs to be + // different from the destination registers. + if (Rn == Rt || Rn == Rt2) + return Error(Operands[3]->getStartLoc(), + "base register needs to be different from destination " + "registers"); + } + + return false; + } + case ARM::t2LDRDi8: + case ARM::t2LDRD_PRE: + case ARM::t2LDRD_POST: { + // Rt2 must be different from Rt. + unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg()); + if (Rt2 == Rt) + return Error(Operands[3]->getStartLoc(), + "destination operands can't be identical"); return false; } case ARM::STRD: { @@ -5367,50 +5426,77 @@ validateInstruction(MCInst &Inst, } case ARM::SBFX: case ARM::UBFX: { - // width must be in range [1, 32-lsb] - unsigned lsb = Inst.getOperand(2).getImm(); - unsigned widthm1 = Inst.getOperand(3).getImm(); - if (widthm1 >= 32 - lsb) + // Width must be in range [1, 32-lsb]. + unsigned LSB = Inst.getOperand(2).getImm(); + unsigned Widthm1 = Inst.getOperand(3).getImm(); + if (Widthm1 >= 32 - LSB) return Error(Operands[5]->getStartLoc(), "bitfield width must be in range [1,32-lsb]"); return false; } + // Notionally handles ARM::tLDMIA_UPD too. case ARM::tLDMIA: { // If we're parsing Thumb2, the .w variant is available and handles - // most cases that are normally illegal for a Thumb1 LDM - // instruction. We'll make the transformation in processInstruction() - // if necessary. + // most cases that are normally illegal for a Thumb1 LDM instruction. + // We'll make the transformation in processInstruction() if necessary. // // Thumb LDM instructions are writeback iff the base register is not // in the register list. unsigned Rn = Inst.getOperand(0).getReg(); - bool hasWritebackToken = + bool HasWritebackToken = (static_cast<ARMOperand*>(Operands[3])->isToken() && static_cast<ARMOperand*>(Operands[3])->getToken() == "!"); - bool listContainsBase; - if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) && !isThumbTwo()) - return Error(Operands[3 + hasWritebackToken]->getStartLoc(), + bool ListContainsBase; + if (checkLowRegisterList(Inst, 3, Rn, 0, ListContainsBase) && !isThumbTwo()) + return Error(Operands[3 + HasWritebackToken]->getStartLoc(), "registers must be in range r0-r7"); // If we should have writeback, then there should be a '!' token. - if (!listContainsBase && !hasWritebackToken && !isThumbTwo()) + if (!ListContainsBase && !HasWritebackToken && !isThumbTwo()) return Error(Operands[2]->getStartLoc(), "writeback operator '!' expected"); // If we should not have writeback, there must not be a '!'. This is // true even for the 32-bit wide encodings. - if (listContainsBase && hasWritebackToken) + if (ListContainsBase && HasWritebackToken) return Error(Operands[3]->getStartLoc(), "writeback operator '!' not allowed when base register " "in register list"); break; } - case ARM::t2LDMIA_UPD: { + case ARM::LDMIA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::LDMDA_UPD: + // ARM variants loading and updating the same register are only officially + // UNPREDICTABLE on v7 upwards. Goodness knows what they did before. + if (!hasV7Ops()) + break; + // Fallthrough + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: { if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg())) - return Error(Operands[4]->getStartLoc(), - "writeback operator '!' not allowed when base register " - "in register list"); + return Error(Operands.back()->getStartLoc(), + "writeback register not allowed in register list"); break; } + case ARM::sysLDMIA_UPD: + case ARM::sysLDMDA_UPD: + case ARM::sysLDMDB_UPD: + case ARM::sysLDMIB_UPD: + if (!listContainsReg(Inst, 3, ARM::PC)) + return Error(Operands[4]->getStartLoc(), + "writeback register only allowed on system LDM " + "if PC in register-list"); + break; + case ARM::sysSTMIA_UPD: + case ARM::sysSTMDA_UPD: + case ARM::sysSTMDB_UPD: + case ARM::sysSTMIB_UPD: + return Error(Operands[2]->getStartLoc(), + "system STM cannot have writeback register"); + break; case ARM::tMUL: { // The second source operand must be the same register as the destination // operand. @@ -5434,26 +5520,35 @@ validateInstruction(MCInst &Inst, // so only issue a diagnostic for thumb1. The instructions will be // switched to the t2 encodings in processInstruction() if necessary. case ARM::tPOP: { - bool listContainsBase; - if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase) && + bool ListContainsBase; + if (checkLowRegisterList(Inst, 2, 0, ARM::PC, ListContainsBase) && !isThumbTwo()) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or pc"); break; } case ARM::tPUSH: { - bool listContainsBase; - if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase) && + bool ListContainsBase; + if (checkLowRegisterList(Inst, 2, 0, ARM::LR, ListContainsBase) && !isThumbTwo()) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or lr"); break; } case ARM::tSTMIA_UPD: { - bool listContainsBase; - if (checkLowRegisterList(Inst, 4, 0, 0, listContainsBase) && !isThumbTwo()) + bool ListContainsBase, InvalidLowList; + InvalidLowList = checkLowRegisterList(Inst, 4, Inst.getOperand(0).getReg(), + 0, ListContainsBase); + if (InvalidLowList && !isThumbTwo()) return Error(Operands[4]->getStartLoc(), "registers must be in range r0-r7"); + + // This would be converted to a 32-bit stm, but that's not valid if the + // writeback register is in the list. + if (InvalidLowList && ListContainsBase) + return Error(Operands[4]->getStartLoc(), + "writeback operator '!' not allowed when base register " + "in register list"); break; } case ARM::tADDrSP: { @@ -5466,6 +5561,28 @@ validateInstruction(MCInst &Inst, } break; } + // Final range checking for Thumb unconditional branch instructions. + case ARM::tB: + if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<11, 1>()) + return Error(Operands[2]->getStartLoc(), "branch target out of range"); + break; + case ARM::t2B: { + int op = (Operands[2]->isImm()) ? 2 : 3; + if (!(static_cast<ARMOperand*>(Operands[op]))->isSignedOffset<24, 1>()) + return Error(Operands[op]->getStartLoc(), "branch target out of range"); + break; + } + // Final range checking for Thumb conditional branch instructions. + case ARM::tBcc: + if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<8, 1>()) + return Error(Operands[2]->getStartLoc(), "branch target out of range"); + break; + case ARM::t2Bcc: { + int Op = (Operands[2]->isImm()) ? 2 : 3; + if (!(static_cast<ARMOperand*>(Operands[Op]))->isSignedOffset<20, 1>()) + return Error(Operands[Op]->getStartLoc(), "branch target out of range"); + break; + } } return false; @@ -5749,7 +5866,9 @@ processInstruction(MCInst &Inst, case ARM::t2LDRpcrel: // Select the narrow version if the immediate will fit. if (Inst.getOperand(1).getImm() > 0 && - Inst.getOperand(1).getImm() <= 0xff) + Inst.getOperand(1).getImm() <= 0xff && + !(static_cast<ARMOperand*>(Operands[2])->isToken() && + static_cast<ARMOperand*>(Operands[2])->getToken() == ".w")) Inst.setOpcode(ARM::tLDRpci); else Inst.setOpcode(ARM::t2LDRpci); @@ -7398,11 +7517,10 @@ processInstruction(MCInst &Inst, MCOperand &MO = Inst.getOperand(1); unsigned Mask = MO.getImm(); unsigned OrigMask = Mask; - unsigned TZ = CountTrailingZeros_32(Mask); + unsigned TZ = countTrailingZeros(Mask); if ((Inst.getOperand(0).getImm() & 1) == 0) { assert(Mask && TZ <= 3 && "illegal IT mask value!"); - for (unsigned i = 3; i != TZ; --i) - Mask ^= 1 << i; + Mask ^= (0xE << TZ) & 0xF; } MO.setImm(Mask); @@ -7503,7 +7621,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { // 16-bit thumb arithmetic instructions either require or preclude the 'S' // suffix depending on whether they're in an IT block or not. unsigned Opc = Inst.getOpcode(); - const MCInstrDesc &MCID = getInstDesc(Opc); + const MCInstrDesc &MCID = MII.get(Opc); if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) { assert(MCID.hasOptionalDef() && "optionally flag setting instruction missing optional def operand"); @@ -7564,12 +7682,22 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return true; } - // Some instructions need post-processing to, for example, tweak which - // encoding is selected. Loop on it while changes happen so the - // individual transformations can chain off each other. E.g., - // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8) - while (processInstruction(Inst, Operands)) - ; + { // processInstruction() updates inITBlock state, we need to save it away + bool wasInITBlock = inITBlock(); + + // Some instructions need post-processing to, for example, tweak which + // encoding is selected. Loop on it while changes happen so the + // individual transformations can chain off each other. E.g., + // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8) + while (processInstruction(Inst, Operands)) + ; + + // Only after the instruction is fully processed, we can validate it + if (wasInITBlock && hasV8Ops() && isThumb() && + !isV8EligibleForIT(&Inst, 2)) { + Warning(IDLoc, "deprecated instruction in IT block"); + } + } // Only move forward at the very end so that everything in validate // and process gets a consistent answer about whether we're in an IT @@ -7622,15 +7750,15 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "instruction variant requires ARMv6 or later"); case Match_RequiresThumb2: return Error(IDLoc, "instruction variant requires Thumb2"); - case Match_ImmRange0_4: { + case Match_ImmRange0_15: { SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - return Error(ErrorLoc, "immediate operand must be in the range [0,4]"); + return Error(ErrorLoc, "immediate operand must be in the range [0,15]"); } - case Match_ImmRange0_15: { + case Match_ImmRange0_239: { SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - return Error(ErrorLoc, "immediate operand must be in the range [0,15]"); + return Error(ErrorLoc, "immediate operand must be in the range [0,239]"); } } @@ -7658,6 +7786,28 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveArch(DirectiveID.getLoc()); else if (IDVal == ".eabi_attribute") return parseDirectiveEabiAttr(DirectiveID.getLoc()); + else if (IDVal == ".cpu") + return parseDirectiveCPU(DirectiveID.getLoc()); + else if (IDVal == ".fpu") + return parseDirectiveFPU(DirectiveID.getLoc()); + else if (IDVal == ".fnstart") + return parseDirectiveFnStart(DirectiveID.getLoc()); + else if (IDVal == ".fnend") + return parseDirectiveFnEnd(DirectiveID.getLoc()); + else if (IDVal == ".cantunwind") + return parseDirectiveCantUnwind(DirectiveID.getLoc()); + else if (IDVal == ".personality") + return parseDirectivePersonality(DirectiveID.getLoc()); + else if (IDVal == ".handlerdata") + return parseDirectiveHandlerData(DirectiveID.getLoc()); + else if (IDVal == ".setfp") + return parseDirectiveSetFP(DirectiveID.getLoc()); + else if (IDVal == ".pad") + return parseDirectivePad(DirectiveID.getLoc()); + else if (IDVal == ".save") + return parseDirectiveRegSave(DirectiveID.getLoc(), false); + else if (IDVal == ".vsave") + return parseDirectiveRegSave(DirectiveID.getLoc(), true); return true; } @@ -7693,6 +7843,9 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { return Error(L, "unexpected token in directive"); Parser.Lex(); + if (!hasThumb()) + return Error(L, "target does not support Thumb mode"); + if (!isThumb()) SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); @@ -7706,19 +7859,27 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) { return Error(L, "unexpected token in directive"); Parser.Lex(); + if (!hasARM()) + return Error(L, "target does not support ARM mode"); + if (isThumb()) SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); return false; } +void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) { + if (NextSymbolIsThumb) { + getParser().getStreamer().EmitThumbFunc(Symbol); + NextSymbolIsThumb = false; + } +} + /// parseDirectiveThumbFunc /// ::= .thumbfunc symbol_name bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { - const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI.hasSubsectionsViaSymbols(); - StringRef Name; - bool needFuncName = true; + const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); + bool isMachO = MAI->hasSubsectionsViaSymbols(); // Darwin asm has (optionally) function name after .thumb_func direction // ELF doesn't @@ -7727,29 +7888,19 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { if (Tok.isNot(AsmToken::EndOfStatement)) { if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) return Error(L, "unexpected token in .thumb_func directive"); - Name = Tok.getIdentifier(); + MCSymbol *Func = + getParser().getContext().GetOrCreateSymbol(Tok.getIdentifier()); + getParser().getStreamer().EmitThumbFunc(Func); Parser.Lex(); // Consume the identifier token. - needFuncName = false; + return false; } } if (getLexer().isNot(AsmToken::EndOfStatement)) return Error(L, "unexpected token in directive"); - // Eat the end of statement and any blank lines that follow. - while (getLexer().is(AsmToken::EndOfStatement)) - Parser.Lex(); - - // FIXME: assuming function name will be the line following .thumb_func - // We really should be checking the next symbol definition even if there's - // stuff in between. - if (needFuncName) { - Name = Parser.getTok().getIdentifier(); - } + NextSymbolIsThumb = true; - // Mark symbol as a thumb symbol. - MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name); - getParser().getStreamer().EmitThumbFunc(Func); return false; } @@ -7795,10 +7946,16 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) { Parser.Lex(); if (Val == 16) { + if (!hasThumb()) + return Error(L, "target does not support Thumb mode"); + if (!isThumb()) SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); } else { + if (!hasARM()) + return Error(L, "target does not support ARM mode"); + if (isThumb()) SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); @@ -7855,7 +8012,267 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) { /// parseDirectiveEabiAttr /// ::= .eabi_attribute int, int bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { - return true; + if (Parser.getTok().isNot(AsmToken::Integer)) + return Error(L, "integer expected"); + int64_t Tag = Parser.getTok().getIntVal(); + Parser.Lex(); // eat tag integer + + if (Parser.getTok().isNot(AsmToken::Comma)) + return Error(L, "comma expected"); + Parser.Lex(); // skip comma + + L = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::Integer)) + return Error(L, "integer expected"); + int64_t Value = Parser.getTok().getIntVal(); + Parser.Lex(); // eat value integer + + getTargetStreamer().emitAttribute(Tag, Value); + return false; +} + +/// parseDirectiveCPU +/// ::= .cpu str +bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { + StringRef CPU = getParser().parseStringToEndOfStatement().trim(); + getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU); + return false; +} + +/// parseDirectiveFPU +/// ::= .fpu str +bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { + StringRef FPU = getParser().parseStringToEndOfStatement().trim(); + + unsigned ID = StringSwitch<unsigned>(FPU) +#define ARM_FPU_NAME(NAME, ID) .Case(NAME, ARM::ID) +#include "ARMFPUName.def" + .Default(ARM::INVALID_FPU); + + if (ID == ARM::INVALID_FPU) + return Error(L, "Unknown FPU name"); + + getTargetStreamer().emitFPU(ID); + return false; +} + +/// parseDirectiveFnStart +/// ::= .fnstart +bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { + if (FnStartLoc.isValid()) { + Error(L, ".fnstart starts before the end of previous one"); + Error(FnStartLoc, "previous .fnstart starts here"); + return true; + } + + FnStartLoc = L; + getTargetStreamer().emitFnStart(); + return false; +} + +/// parseDirectiveFnEnd +/// ::= .fnend +bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) { + // Check the ordering of unwind directives + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .fnend directive"); + + // Reset the unwind directives parser state + resetUnwindDirectiveParserState(); + getTargetStreamer().emitFnEnd(); + return false; +} + +/// parseDirectiveCantUnwind +/// ::= .cantunwind +bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) { + // Check the ordering of unwind directives + CantUnwindLoc = L; + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .cantunwind directive"); + if (HandlerDataLoc.isValid()) { + Error(L, ".cantunwind can't be used with .handlerdata directive"); + Error(HandlerDataLoc, ".handlerdata was specified here"); + return true; + } + if (PersonalityLoc.isValid()) { + Error(L, ".cantunwind can't be used with .personality directive"); + Error(PersonalityLoc, ".personality was specified here"); + return true; + } + + getTargetStreamer().emitCantUnwind(); + return false; +} + +/// parseDirectivePersonality +/// ::= .personality name +bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { + // Check the ordering of unwind directives + PersonalityLoc = L; + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .personality directive"); + if (CantUnwindLoc.isValid()) { + Error(L, ".personality can't be used with .cantunwind directive"); + Error(CantUnwindLoc, ".cantunwind was specified here"); + return true; + } + if (HandlerDataLoc.isValid()) { + Error(L, ".personality must precede .handlerdata directive"); + Error(HandlerDataLoc, ".handlerdata was specified here"); + return true; + } + + // Parse the name of the personality routine + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Parser.eatToEndOfStatement(); + return Error(L, "unexpected input in .personality directive."); + } + StringRef Name(Parser.getTok().getIdentifier()); + Parser.Lex(); + + MCSymbol *PR = getParser().getContext().GetOrCreateSymbol(Name); + getTargetStreamer().emitPersonality(PR); + return false; +} + +/// parseDirectiveHandlerData +/// ::= .handlerdata +bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { + // Check the ordering of unwind directives + HandlerDataLoc = L; + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .personality directive"); + if (CantUnwindLoc.isValid()) { + Error(L, ".handlerdata can't be used with .cantunwind directive"); + Error(CantUnwindLoc, ".cantunwind was specified here"); + return true; + } + + getTargetStreamer().emitHandlerData(); + return false; +} + +/// parseDirectiveSetFP +/// ::= .setfp fpreg, spreg [, offset] +bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { + // Check the ordering of unwind directives + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .setfp directive"); + if (HandlerDataLoc.isValid()) + return Error(L, ".setfp must precede .handlerdata directive"); + + // Parse fpreg + SMLoc NewFPRegLoc = Parser.getTok().getLoc(); + int NewFPReg = tryParseRegister(); + if (NewFPReg == -1) + return Error(NewFPRegLoc, "frame pointer register expected"); + + // Consume comma + if (!Parser.getTok().is(AsmToken::Comma)) + return Error(Parser.getTok().getLoc(), "comma expected"); + Parser.Lex(); // skip comma + + // Parse spreg + SMLoc NewSPRegLoc = Parser.getTok().getLoc(); + int NewSPReg = tryParseRegister(); + if (NewSPReg == -1) + return Error(NewSPRegLoc, "stack pointer register expected"); + + if (NewSPReg != ARM::SP && NewSPReg != FPReg) + return Error(NewSPRegLoc, + "register should be either $sp or the latest fp register"); + + // Update the frame pointer register + FPReg = NewFPReg; + + // Parse offset + int64_t Offset = 0; + if (Parser.getTok().is(AsmToken::Comma)) { + Parser.Lex(); // skip comma + + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { + return Error(Parser.getTok().getLoc(), "'#' expected"); + } + Parser.Lex(); // skip hash token. + + const MCExpr *OffsetExpr; + SMLoc ExLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + if (getParser().parseExpression(OffsetExpr, EndLoc)) + return Error(ExLoc, "malformed setfp offset"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); + if (!CE) + return Error(ExLoc, "setfp offset must be an immediate"); + + Offset = CE->getValue(); + } + + getTargetStreamer().emitSetFP(static_cast<unsigned>(NewFPReg), + static_cast<unsigned>(NewSPReg), Offset); + return false; +} + +/// parseDirective +/// ::= .pad offset +bool ARMAsmParser::parseDirectivePad(SMLoc L) { + // Check the ordering of unwind directives + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .pad directive"); + if (HandlerDataLoc.isValid()) + return Error(L, ".pad must precede .handlerdata directive"); + + // Parse the offset + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { + return Error(Parser.getTok().getLoc(), "'#' expected"); + } + Parser.Lex(); // skip hash token. + + const MCExpr *OffsetExpr; + SMLoc ExLoc = Parser.getTok().getLoc(); + SMLoc EndLoc; + if (getParser().parseExpression(OffsetExpr, EndLoc)) + return Error(ExLoc, "malformed pad offset"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); + if (!CE) + return Error(ExLoc, "pad offset must be an immediate"); + + getTargetStreamer().emitPad(CE->getValue()); + return false; +} + +/// parseDirectiveRegSave +/// ::= .save { registers } +/// ::= .vsave { registers } +bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { + // Check the ordering of unwind directives + if (!FnStartLoc.isValid()) + return Error(L, ".fnstart must precede .save or .vsave directives"); + if (HandlerDataLoc.isValid()) + return Error(L, ".save or .vsave must precede .handlerdata directive"); + + // RAII object to make sure parsed operands are deleted. + struct CleanupObject { + SmallVector<MCParsedAsmOperand *, 1> Operands; + ~CleanupObject() { + for (unsigned I = 0, E = Operands.size(); I != E; ++I) + delete Operands[I]; + } + } CO; + + // Parse the register list + if (parseRegisterList(CO.Operands)) + return true; + ARMOperand *Op = (ARMOperand*)CO.Operands[0]; + if (!IsVector && !Op->isRegList()) + return Error(L, ".save expects GPR registers"); + if (IsVector && !Op->isDPRRegList()) + return Error(L, ".vsave expects DPR registers"); + + getTargetStreamer().emitRegSave(Op->getRegList(), IsVector); + return false; } /// Force static initialization. diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ac937f35345e..9c7988f8a857 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -65,7 +65,7 @@ namespace { void setITState(char Firstcond, char Mask) { // (3 - the number of trailing zeros) is the number of then / else. unsigned CondBit0 = Firstcond & 1; - unsigned NumTZ = CountTrailingZeros_32(Mask); + unsigned NumTZ = countTrailingZeros<uint8_t>(Mask); unsigned char CCBits = static_cast<unsigned char>(Firstcond & 0xf); assert(NumTZ <= 3 && "Invalid IT mask!"); // push condition codes onto the stack the correct order for the pops @@ -156,12 +156,17 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, @@ -236,6 +241,14 @@ static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val, @@ -268,6 +281,8 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, @@ -308,8 +323,6 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder); static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, @@ -332,6 +345,14 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); +static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); +static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); +static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, @@ -348,6 +369,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, @@ -402,7 +425,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!"); // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + if (Region.readBytes(Address, 4, bytes) == -1) { Size = 0; return MCDisassembler::Fail; } @@ -431,6 +454,13 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); + result = decodeInstruction(DecoderTableVFPV832, MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address, this, STI); if (result != MCDisassembler::Fail) { @@ -467,7 +497,22 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); + result = decodeInstruction(DecoderTablev8NEON32, MI, insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); + result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + MI.clear(); Size = 0; return MCDisassembler::Fail; } @@ -492,102 +537,9 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, bool isBranch, uint64_t InstSize, MCInst &MI, const void *Decoder) { const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); - LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback(); - struct LLVMOpInfo1 SymbolicOp; - memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); - SymbolicOp.Value = Value; - void *DisInfo = Dis->getDisInfoBlock(); - - if (!getOpInfo || - !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { - // Clear SymbolicOp.Value from above and also all other fields. - memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); - LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); - if (!SymbolLookUp) - return false; - uint64_t ReferenceType; - if (isBranch) - ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; - else - ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; - const char *ReferenceName; - uint64_t SymbolValue = 0x00000000ffffffffULL & Value; - const char *Name = SymbolLookUp(DisInfo, SymbolValue, &ReferenceType, - Address, &ReferenceName); - if (Name) { - SymbolicOp.AddSymbol.Name = Name; - SymbolicOp.AddSymbol.Present = true; - } - // For branches always create an MCExpr so it gets printed as hex address. - else if (isBranch) { - SymbolicOp.Value = Value; - } - if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) - (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; - if (!Name && !isBranch) - return false; - } - - MCContext *Ctx = Dis->getMCContext(); - const MCExpr *Add = NULL; - if (SymbolicOp.AddSymbol.Present) { - if (SymbolicOp.AddSymbol.Name) { - StringRef Name(SymbolicOp.AddSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Add = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); - } - } - - const MCExpr *Sub = NULL; - if (SymbolicOp.SubtractSymbol.Present) { - if (SymbolicOp.SubtractSymbol.Name) { - StringRef Name(SymbolicOp.SubtractSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Sub = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); - } - } - - const MCExpr *Off = NULL; - if (SymbolicOp.Value != 0) - Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); - - const MCExpr *Expr; - if (Sub) { - const MCExpr *LHS; - if (Add) - LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); - else - LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); - else - Expr = LHS; - } else if (Add) { - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); - else - Expr = Add; - } else { - if (Off != 0) - Expr = Off; - else - Expr = MCConstantExpr::Create(0, *Ctx); - } - - if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16) - MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx))); - else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16) - MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); - else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) - MI.addOperand(MCOperand::CreateExpr(Expr)); - else - llvm_unreachable("bad SymbolicOp.VariantKind"); - - return true; + // FIXME: Does it make sense for value to be negative? + return Dis->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, isBranch, + /* Offset */ 0, InstSize); } /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being @@ -602,17 +554,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value, const void *Decoder) { const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); - LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); - if (SymbolLookUp) { - void *DisInfo = Dis->getDisInfoBlock(); - uint64_t ReferenceType; - ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; - const char *ReferenceName; - (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); - if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr || - ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) - (*Dis->CommentStream) << "literal pool for: " << ReferenceName; - } + Dis->tryAddingPcLoadReferenceComment(Value, Address); } // Thumb1 instructions don't have explicit S bits. Rather, they @@ -751,7 +693,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!"); // We want to read exactly 2 bytes of data. - if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) { + if (Region.readBytes(Address, 2, bytes) == -1) { Size = 0; return MCDisassembler::Fail; } @@ -803,7 +745,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + if (Region.readBytes(Address, 4, bytes) == -1) { Size = 0; return MCDisassembler::Fail; } @@ -832,23 +774,34 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return result; } - MI.clear(); - result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI); - if (result != MCDisassembler::Fail) { - Size = 4; - UpdateThumbVFPPredicate(MI); - return result; + if (fieldFromInstruction(insn32, 28, 4) == 0xE) { + MI.clear(); + result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + UpdateThumbVFPPredicate(MI); + return result; + } } MI.clear(); - result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address, - this, STI); + result = decodeInstruction(DecoderTableVFPV832, MI, insn32, Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; - Check(result, AddThumbPredicate(MI)); return result; } + if (fieldFromInstruction(insn32, 28, 4) == 0xE) { + MI.clear(); + result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + Check(result, AddThumbPredicate(MI)); + return result; + } + } + if (fieldFromInstruction(insn32, 24, 8) == 0xF9) { MI.clear(); uint32_t NEONLdStInsn = insn32; @@ -876,8 +829,31 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Check(result, AddThumbPredicate(MI)); return result; } + + MI.clear(); + uint32_t NEONCryptoInsn = insn32; + NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24 + NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 + NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25 + result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn, + Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); + uint32_t NEONv8Insn = insn32; + NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26 + result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } } + MI.clear(); Size = 0; return MCDisassembler::Fail; } @@ -920,6 +896,21 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, return S; } +static DecodeStatus +DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (RegNo == 15) + { + Inst.addOperand(MCOperand::CreateReg(ARM::APSR_NZCV)); + return MCDisassembler::Success; + } + + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); + return S; +} + static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) @@ -927,6 +918,26 @@ static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } +static const uint16_t GPRPairDecoderTable[] = { + ARM::R0_R1, ARM::R2_R3, ARM::R4_R5, ARM::R6_R7, + ARM::R8_R9, ARM::R10_R11, ARM::R12_SP +}; + +static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (RegNo > 13) + return MCDisassembler::Fail; + + if ((RegNo & 1) || RegNo == 0xe) + S = MCDisassembler::SoftFail; + + unsigned RegisterPair = GPRPairDecoderTable[RegNo/2]; + Inst.addOperand(MCOperand::CreateReg(RegisterPair)); + return S; +} + static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { unsigned Register = 0; @@ -959,8 +970,11 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail; - return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); + DecodeStatus S = MCDisassembler::Success; + if (RegNo == 13 || RegNo == 15) + S = MCDisassembler::SoftFail; + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); + return S; } static const uint16_t SPRDecoderTable[] = { @@ -1030,7 +1044,7 @@ static const uint16_t QPRDecoderTable[] = { static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 31) + if (RegNo > 31 || (RegNo & 1) != 0) return MCDisassembler::Fail; RegNo >>= 1; @@ -1189,30 +1203,32 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - bool writebackLoad = false; - unsigned writebackReg = 0; + bool NeedDisjointWriteback = false; + unsigned WritebackReg = 0; switch (Inst.getOpcode()) { - default: - break; - case ARM::LDMIA_UPD: - case ARM::LDMDB_UPD: - case ARM::LDMIB_UPD: - case ARM::LDMDA_UPD: - case ARM::t2LDMIA_UPD: - case ARM::t2LDMDB_UPD: - writebackLoad = true; - writebackReg = Inst.getOperand(0).getReg(); - break; + default: + break; + case ARM::LDMIA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::LDMDA_UPD: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: + NeedDisjointWriteback = true; + WritebackReg = Inst.getOperand(0).getReg(); + break; } // Empty register lists are not allowed. - if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail; + if (Val == 0) return MCDisassembler::Fail; for (unsigned i = 0; i < 16; ++i) { if (Val & (1 << i)) { if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder))) return MCDisassembler::Fail; // Writeback not allowed if Rn is in the target list. - if (writebackLoad && writebackReg == Inst.end()[-1].getReg()) + if (NeedDisjointWriteback && WritebackReg == Inst.end()[-1].getReg()) Check(S, MCDisassembler::SoftFail); } } @@ -1227,6 +1243,13 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, unsigned Vd = fieldFromInstruction(Val, 8, 5); unsigned regs = fieldFromInstruction(Val, 0, 8); + // In case of unpredictable encoding, tweak the operands. + if (regs == 0 || (Vd + regs) > 32) { + regs = Vd + regs > 32 ? 32 - Vd : regs; + regs = std::max( 1u, regs); + S = MCDisassembler::SoftFail; + } + if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; for (unsigned i = 0; i < (regs - 1); ++i) { @@ -1242,9 +1265,15 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, DecodeStatus S = MCDisassembler::Success; unsigned Vd = fieldFromInstruction(Val, 8, 5); - unsigned regs = fieldFromInstruction(Val, 0, 8); + unsigned regs = fieldFromInstruction(Val, 1, 7); - regs = regs >> 1; + // In case of unpredictable encoding, tweak the operands. + if (regs == 0 || regs > 16 || (Vd + regs) > 32) { + regs = Vd + regs > 32 ? 32 - Vd : regs; + regs = std::max( 1u, regs); + regs = std::min(16u, regs); + S = MCDisassembler::SoftFail; + } if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; @@ -1334,6 +1363,11 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, break; } + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + if ((featureBits & ARM::HasV8Ops) && (coproc != 14)) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(coproc)); Inst.addOperand(MCOperand::CreateImm(CRd)); if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) @@ -1797,6 +1831,29 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeQADDInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + + if (pred == 0xF) + return DecodeCPSInstruction(Inst, Insn, Address, Decoder); + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { @@ -1807,6 +1864,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned reglist = fieldFromInstruction(Insn, 0, 16); if (pred == 0xF) { + // Ambiguous with RFE and SRS switch (Inst.getOpcode()) { case ARM::LDMDA: Inst.setOpcode(ARM::RFEDA); @@ -1857,11 +1915,16 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, Inst.setOpcode(ARM::SRSIB_UPD); break; default: - if (!Check(S, MCDisassembler::Fail)) return MCDisassembler::Fail; + return MCDisassembler::Fail; } // For stores (which become SRS's, the only operand is the mode. if (fieldFromInstruction(Insn, 20, 1) == 0) { + // Check SRS encoding constraints + if (!(fieldFromInstruction(Insn, 22, 1) == 1 && + fieldFromInstruction(Insn, 20, 1) == 0)) + return MCDisassembler::Fail; + Inst.addOperand( MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4))); return S; @@ -1891,6 +1954,13 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, DecodeStatus S = MCDisassembler::Success; + // This decoder is called from multiple location that do not check + // the full encoding is valid before they do. + if (fieldFromInstruction(Insn, 5, 1) != 0 || + fieldFromInstruction(Insn, 16, 1) != 0 || + fieldFromInstruction(Insn, 20, 8) != 0x10) + return MCDisassembler::Fail; + // imod == '01' --> UNPREDICTABLE // NOTE: Even though this is technically UNPREDICTABLE, we choose to // return failure here. The '01' imod value is unprintable, so there's @@ -2106,7 +2176,7 @@ DecodeT2BInstruction(MCInst &Inst, unsigned Insn, unsigned imm10 = fieldFromInstruction(Insn, 16, 10); unsigned imm11 = fieldFromInstruction(Insn, 0, 11); unsigned tmp = (S << 23) | (I1 << 22) | (I2 << 21) | (imm10 << 11) | imm11; - int imm32 = SignExtend32<24>(tmp << 1); + int imm32 = SignExtend32<25>(tmp << 1); if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(imm32)); @@ -2432,6 +2502,57 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeVLDST1Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned type = fieldFromInstruction(Insn, 8, 4); + unsigned align = fieldFromInstruction(Insn, 4, 2); + if (type == 6 && (align & 2)) return MCDisassembler::Fail; + if (type == 7 && (align & 2)) return MCDisassembler::Fail; + if (type == 10 && align == 3) return MCDisassembler::Fail; + + unsigned load = fieldFromInstruction(Insn, 21, 1); + return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder) + : DecodeVSTInstruction(Inst, Insn, Address, Decoder); +} + +static DecodeStatus DecodeVLDST2Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned size = fieldFromInstruction(Insn, 6, 2); + if (size == 3) return MCDisassembler::Fail; + + unsigned type = fieldFromInstruction(Insn, 8, 4); + unsigned align = fieldFromInstruction(Insn, 4, 2); + if (type == 8 && align == 3) return MCDisassembler::Fail; + if (type == 9 && align == 3) return MCDisassembler::Fail; + + unsigned load = fieldFromInstruction(Insn, 21, 1); + return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder) + : DecodeVSTInstruction(Inst, Insn, Address, Decoder); +} + +static DecodeStatus DecodeVLDST3Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned size = fieldFromInstruction(Insn, 6, 2); + if (size == 3) return MCDisassembler::Fail; + + unsigned align = fieldFromInstruction(Insn, 4, 2); + if (align & 2) return MCDisassembler::Fail; + + unsigned load = fieldFromInstruction(Insn, 21, 1); + return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder) + : DecodeVSTInstruction(Inst, Insn, Address, Decoder); +} + +static DecodeStatus DecodeVLDST4Instruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned size = fieldFromInstruction(Insn, 6, 2); + if (size == 3) return MCDisassembler::Fail; + + unsigned load = fieldFromInstruction(Insn, 21, 1); + return load ? DecodeVLDInstruction(Inst, Insn, Address, Decoder) + : DecodeVSTInstruction(Inst, Insn, Address, Decoder); +} + static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3115,6 +3236,17 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, unsigned Rm = fieldFromInstruction(Val, 2, 4); unsigned imm = fieldFromInstruction(Val, 0, 2); + // Thumb stores cannot use PC as dest register. + switch (Inst.getOpcode()) { + case ARM::t2STRHs: + case ARM::t2STRBs: + case ARM::t2STRs: + if (Rn == 15) + return MCDisassembler::Fail; + default: + break; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) @@ -3128,53 +3260,282 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRBs: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRHs: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSHs: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + case ARM::t2LDRSBs: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRs: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2PLDs: + Inst.setOpcode(ARM::t2PLDpci); + break; + case ARM::t2PLIs: + Inst.setOpcode(ARM::t2PLIpci); + break; + default: + return MCDisassembler::Fail; + } + + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRSHs: + return MCDisassembler::Fail; + case ARM::t2LDRHs: + // FIXME: this instruction is only available with MP extensions, + // this should be checked first but we don't have access to the + // feature bits here. + Inst.setOpcode(ARM::t2PLDWs); + break; + default: + break; + } + } + switch (Inst.getOpcode()) { case ARM::t2PLDs: case ARM::t2PLDWs: case ARM::t2PLIs: break; - default: { - unsigned Rt = fieldFromInstruction(Insn, 12, 4); - if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + default: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + } + + unsigned addrmode = fieldFromInstruction(Insn, 4, 2); + addrmode |= fieldFromInstruction(Insn, 0, 4) << 2; + addrmode |= fieldFromInstruction(Insn, 16, 4) << 6; + if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder))) return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned U = fieldFromInstruction(Insn, 9, 1); + unsigned imm = fieldFromInstruction(Insn, 0, 8); + imm |= (U << 8); + imm |= (Rn << 9); + + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRi8: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRBi8: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRSBi8: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRHi8: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSHi8: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + case ARM::t2PLDi8: + Inst.setOpcode(ARM::t2PLDpci); + break; + case ARM::t2PLIi8: + Inst.setOpcode(ARM::t2PLIpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRSHi8: + return MCDisassembler::Fail; + default: + break; } } + switch (Inst.getOpcode()) { + case ARM::t2PLDi8: + case ARM::t2PLIi8: + case ARM::t2PLDWi8: + break; + default: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + +static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); - if (Rn == 0xF) { + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + imm |= (Rn << 13); + + if (Rn == 15) { switch (Inst.getOpcode()) { - case ARM::t2LDRBs: - Inst.setOpcode(ARM::t2LDRBpci); - break; - case ARM::t2LDRHs: - Inst.setOpcode(ARM::t2LDRHpci); - break; - case ARM::t2LDRSHs: - Inst.setOpcode(ARM::t2LDRSHpci); - break; - case ARM::t2LDRSBs: - Inst.setOpcode(ARM::t2LDRSBpci); + case ARM::t2LDRi12: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRHi12: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSHi12: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + case ARM::t2LDRBi12: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRSBi12: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2PLDi12: + Inst.setOpcode(ARM::t2PLDpci); + break; + case ARM::t2PLIi12: + Inst.setOpcode(ARM::t2PLIpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRSHi12: + return MCDisassembler::Fail; + case ARM::t2LDRHi12: + Inst.setOpcode(ARM::t2PLDi12); + break; + default: + break; + } + } + + switch (Inst.getOpcode()) { + case ARM::t2PLDi12: + case ARM::t2PLDWi12: + case ARM::t2PLIi12: + break; + default: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeT2AddrModeImm12(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + +static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 8); + imm |= (Rn << 9); + + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRT: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRBT: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRHT: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSBT: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRSHT: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + +static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned U = fieldFromInstruction(Insn, 23, 1); + int imm = fieldFromInstruction(Insn, 0, 12); + + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRBpci: + case ARM::t2LDRHpci: + Inst.setOpcode(ARM::t2PLDpci); break; - case ARM::t2PLDs: - Inst.setOpcode(ARM::t2PLDi12); - Inst.addOperand(MCOperand::CreateReg(ARM::PC)); + case ARM::t2LDRSBpci: + Inst.setOpcode(ARM::t2PLIpci); break; - default: + case ARM::t2LDRSHpci: return MCDisassembler::Fail; + default: + break; } + } - int imm = fieldFromInstruction(Insn, 0, 12); - if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1; - Inst.addOperand(MCOperand::CreateImm(imm)); - - return S; + switch(Inst.getOpcode()) { + case ARM::t2PLDpci: + case ARM::t2PLIpci: + break; + default: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; } - unsigned addrmode = fieldFromInstruction(Insn, 4, 2); - addrmode |= fieldFromInstruction(Insn, 0, 4) << 2; - addrmode |= fieldFromInstruction(Insn, 16, 4) << 6; - if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder))) - return MCDisassembler::Fail; + if (!U) { + // Special case for #-0. + if (imm == 0) + imm = INT32_MIN; + else + imm = -imm; + } + Inst.addOperand(MCOperand::CreateImm(imm)); return S; } @@ -3243,6 +3604,21 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, unsigned Rn = fieldFromInstruction(Val, 9, 4); unsigned imm = fieldFromInstruction(Val, 0, 9); + // Thumb stores cannot use PC as dest register. + switch (Inst.getOpcode()) { + case ARM::t2STRT: + case ARM::t2STRBT: + case ARM::t2STRHT: + case ARM::t2STRi8: + case ARM::t2STRHi8: + case ARM::t2STRBi8: + if (Rn == 15) + return MCDisassembler::Fail; + break; + default: + break; + } + // Some instructions always use an additive offset. switch (Inst.getOpcode()) { case ARM::t2LDRT: @@ -3278,6 +3654,37 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, addr |= Rn << 9; unsigned load = fieldFromInstruction(Insn, 20, 1); + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDR_PRE: + case ARM::t2LDR_POST: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRB_PRE: + case ARM::t2LDRB_POST: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRH_PRE: + case ARM::t2LDRH_POST: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSB_PRE: + case ARM::t2LDRSB_POST: + if (Rt == 15) + Inst.setOpcode(ARM::t2PLIpci); + else + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRSH_PRE: + case ARM::t2LDRSH_POST: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + if (!load) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3304,6 +3711,17 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, unsigned Rn = fieldFromInstruction(Val, 13, 4); unsigned imm = fieldFromInstruction(Val, 0, 12); + // Thumb stores cannot use PC as dest register. + switch (Inst.getOpcode()) { + case ARM::t2STRi12: + case ARM::t2STRBi12: + case ARM::t2STRHi12: + if (Rn == 15) + return MCDisassembler::Fail; + default: + break; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(imm)); @@ -3401,6 +3819,11 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val, if (Val == 0xA || Val == 0xB) return MCDisassembler::Fail; + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + if ((featureBits & ARM::HasV8Ops) && !(Val == 14 || Val == 15)) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(Val)); return MCDisassembler::Success; } @@ -3536,6 +3959,15 @@ static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } +static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (Val & ~0xf) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (!Val) return MCDisassembler::Fail; @@ -3551,11 +3983,10 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, unsigned Rn = fieldFromInstruction(Insn, 16, 4); unsigned pred = fieldFromInstruction(Insn, 28, 4); - if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; + if (Rn == 0xF) + S = MCDisassembler::SoftFail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder))) + if (!Check(S, DecodeGPRPairRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3565,7 +3996,6 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, return S; } - static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder){ DecodeStatus S = MCDisassembler::Success; @@ -3578,12 +4008,10 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; - if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; - if (Rd == Rn || Rd == Rt || Rd == Rt+1) return MCDisassembler::Fail; + if (Rn == 0xF || Rd == Rn || Rd == Rt || Rd == Rt+1) + S = MCDisassembler::SoftFail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPRRegisterClass(Inst, Rt+1, Address, Decoder))) + if (!Check(S, DecodeGPRPairRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -4310,10 +4738,8 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, S = MCDisassembler::SoftFail; } - if (mask == 0x0) { - mask |= 0x8; - S = MCDisassembler::SoftFail; - } + if (mask == 0x0) + return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(pred)); Inst.addOperand(MCOperand::CreateImm(mask)); @@ -4453,16 +4879,18 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, Vm |= (fieldFromInstruction(Insn, 5, 1) << 4); unsigned imm = fieldFromInstruction(Insn, 16, 6); unsigned cmode = fieldFromInstruction(Insn, 8, 4); + unsigned op = fieldFromInstruction(Insn, 5, 1); DecodeStatus S = MCDisassembler::Success; // VMOVv2f32 is ambiguous with these decodings. if (!(imm & 0x38) && cmode == 0xF) { + if (op == 1) return MCDisassembler::Fail; Inst.setOpcode(ARM::VMOVv2f32); return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); } - if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail); + if (!(imm & 0x20)) return MCDisassembler::Fail; if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; @@ -4481,16 +4909,18 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, Vm |= (fieldFromInstruction(Insn, 5, 1) << 4); unsigned imm = fieldFromInstruction(Insn, 16, 6); unsigned cmode = fieldFromInstruction(Insn, 8, 4); + unsigned op = fieldFromInstruction(Insn, 5, 1); DecodeStatus S = MCDisassembler::Success; // VMOVv4f32 is ambiguous with these decodings. if (!(imm & 0x38) && cmode == 0xF) { + if (op == 1) return MCDisassembler::Fail; Inst.setOpcode(ARM::VMOVv4f32); return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); } - if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail); + if (!(imm & 0x20)) return MCDisassembler::Fail; if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; @@ -4501,15 +4931,6 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address, - const void *Decoder) -{ - unsigned Imm = fieldFromInstruction(Insn, 0, 3); - if (Imm > 4) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateImm(Imm)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 3bcd083a35fb..f89702853d41 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -76,14 +76,23 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) { unsigned Opcode = MI->getOpcode(); + switch(Opcode) { + // Check for HINT instructions w/ canonical names. - if (Opcode == ARM::HINT || Opcode == ARM::t2HINT) { + case ARM::HINT: + case ARM::tHINT: + case ARM::t2HINT: switch (MI->getOperand(0).getImm()) { case 0: O << "\tnop"; break; case 1: O << "\tyield"; break; case 2: O << "\twfe"; break; case 3: O << "\twfi"; break; case 4: O << "\tsev"; break; + case 5: + if ((getAvailableFeatures() & ARM::HasV8Ops)) { + O << "\tsevl"; + break; + } // Fallthrough for non-v8 default: // Anything else should just print normally. printInstruction(MI, O); @@ -95,10 +104,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, O << ".w"; printAnnotation(O, Annot); return; - } // Check for MOVs and print canonical forms, instead. - if (Opcode == ARM::MOVsr) { + case ARM::MOVsr: { // FIXME: Thumb variants? const MCOperand &Dst = MI->getOperand(0); const MCOperand &MO1 = MI->getOperand(1); @@ -121,7 +129,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - if (Opcode == ARM::MOVsi) { + case ARM::MOVsi: { // FIXME: Thumb variants? const MCOperand &Dst = MI->getOperand(0); const MCOperand &MO1 = MI->getOperand(1); @@ -149,81 +157,91 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - // A8.6.123 PUSH - if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) && - MI->getOperand(0).getReg() == ARM::SP && - MI->getNumOperands() > 5) { - // Should only print PUSH if there are at least two registers in the list. - O << '\t' << "push"; - printPredicateOperand(MI, 2, O); - if (Opcode == ARM::t2STMDB_UPD) - O << ".w"; - O << '\t'; - printRegisterList(MI, 4, O); - printAnnotation(O, Annot); - return; - } - if (Opcode == ARM::STR_PRE_IMM && MI->getOperand(2).getReg() == ARM::SP && - MI->getOperand(3).getImm() == -4) { - O << '\t' << "push"; - printPredicateOperand(MI, 4, O); - O << "\t{"; - printRegName(O, MI->getOperand(1).getReg()); - O << "}"; - printAnnotation(O, Annot); - return; - } + case ARM::STMDB_UPD: + case ARM::t2STMDB_UPD: + if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) { + // Should only print PUSH if there are at least two registers in the list. + O << '\t' << "push"; + printPredicateOperand(MI, 2, O); + if (Opcode == ARM::t2STMDB_UPD) + O << ".w"; + O << '\t'; + printRegisterList(MI, 4, O); + printAnnotation(O, Annot); + return; + } else + break; + + case ARM::STR_PRE_IMM: + if (MI->getOperand(2).getReg() == ARM::SP && + MI->getOperand(3).getImm() == -4) { + O << '\t' << "push"; + printPredicateOperand(MI, 4, O); + O << "\t{"; + printRegName(O, MI->getOperand(1).getReg()); + O << "}"; + printAnnotation(O, Annot); + return; + } else + break; // A8.6.122 POP - if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) && - MI->getOperand(0).getReg() == ARM::SP && - MI->getNumOperands() > 5) { - // Should only print POP if there are at least two registers in the list. - O << '\t' << "pop"; - printPredicateOperand(MI, 2, O); - if (Opcode == ARM::t2LDMIA_UPD) - O << ".w"; - O << '\t'; - printRegisterList(MI, 4, O); - printAnnotation(O, Annot); - return; - } - if (Opcode == ARM::LDR_POST_IMM && MI->getOperand(2).getReg() == ARM::SP && - MI->getOperand(4).getImm() == 4) { - O << '\t' << "pop"; - printPredicateOperand(MI, 5, O); - O << "\t{"; - printRegName(O, MI->getOperand(0).getReg()); - O << "}"; - printAnnotation(O, Annot); - return; - } - + case ARM::LDMIA_UPD: + case ARM::t2LDMIA_UPD: + if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) { + // Should only print POP if there are at least two registers in the list. + O << '\t' << "pop"; + printPredicateOperand(MI, 2, O); + if (Opcode == ARM::t2LDMIA_UPD) + O << ".w"; + O << '\t'; + printRegisterList(MI, 4, O); + printAnnotation(O, Annot); + return; + } else + break; + + case ARM::LDR_POST_IMM: + if (MI->getOperand(2).getReg() == ARM::SP && + MI->getOperand(4).getImm() == 4) { + O << '\t' << "pop"; + printPredicateOperand(MI, 5, O); + O << "\t{"; + printRegName(O, MI->getOperand(0).getReg()); + O << "}"; + printAnnotation(O, Annot); + return; + } else + break; // A8.6.355 VPUSH - if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) && - MI->getOperand(0).getReg() == ARM::SP) { - O << '\t' << "vpush"; - printPredicateOperand(MI, 2, O); - O << '\t'; - printRegisterList(MI, 4, O); - printAnnotation(O, Annot); - return; - } + case ARM::VSTMSDB_UPD: + case ARM::VSTMDDB_UPD: + if (MI->getOperand(0).getReg() == ARM::SP) { + O << '\t' << "vpush"; + printPredicateOperand(MI, 2, O); + O << '\t'; + printRegisterList(MI, 4, O); + printAnnotation(O, Annot); + return; + } else + break; // A8.6.354 VPOP - if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) && - MI->getOperand(0).getReg() == ARM::SP) { - O << '\t' << "vpop"; - printPredicateOperand(MI, 2, O); - O << '\t'; - printRegisterList(MI, 4, O); - printAnnotation(O, Annot); - return; - } + case ARM::VLDMSIA_UPD: + case ARM::VLDMDIA_UPD: + if (MI->getOperand(0).getReg() == ARM::SP) { + O << '\t' << "vpop"; + printPredicateOperand(MI, 2, O); + O << '\t'; + printRegisterList(MI, 4, O); + printAnnotation(O, Annot); + return; + } else + break; - if (Opcode == ARM::tLDMIA) { + case ARM::tLDMIA: { bool Writeback = true; unsigned BaseReg = MI->getOperand(0).getReg(); for (unsigned i = 3; i < MI->getNumOperands(); ++i) { @@ -243,24 +261,16 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - // Thumb1 NOP - if (Opcode == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::R8 && - MI->getOperand(1).getReg() == ARM::R8) { - O << "\tnop"; - printPredicateOperand(MI, 2, O); - printAnnotation(O, Annot); - return; - } - // Combine 2 GPRs from disassember into a GPRPair to match with instr def. // ldrexd/strexd require even/odd GPR pair. To enforce this constraint, // a single GPRPair reg operand is used in the .td file to replace the two // GPRs. However, when decoding them, the two GRPs cannot be automatically // expressed as a GPRPair, so we have to manually merge them. // FIXME: We would really like to be able to tablegen'erate this. - if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) { + case ARM::LDREXD: case ARM::STREXD: + case ARM::LDAEXD: case ARM::STLEXD: const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID); - bool isStore = Opcode == ARM::STREXD; + bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD; unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg(); if (MRC.contains(Reg)) { MCInst NewMI; @@ -315,15 +325,29 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - if (MO1.isExpr()) + if (MO1.isExpr()) { O << *MO1.getExpr(); - else if (MO1.isImm()) { - O << markup("<mem:") << "[pc, " - << markup("<imm:") << "#" << formatImm(MO1.getImm()) - << markup(">]>", "]"); + return; } - else - llvm_unreachable("Unknown LDR label operand?"); + + O << markup("<mem:") << "[pc, "; + + int32_t OffImm = (int32_t)MO1.getImm(); + bool isSub = OffImm < 0; + + // Special value for #-0. All others are normal. + if (OffImm == INT32_MIN) + OffImm = 0; + if (isSub) { + O << markup("<imm:") + << "#-" << formatImm(-OffImm) + << markup(">"); + } else { + O << markup("<imm:") + << "#" << formatImm(OffImm) + << markup(">"); + } + O << "]" << markup(">"); } // so_reg is a 4-operand unit corresponding to register forms of the A5.1 @@ -660,8 +684,8 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); uint32_t v = ~MO.getImm(); - int32_t lsb = CountTrailingZeros_32(v); - int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb; + int32_t lsb = countTrailingZeros(v); + int32_t width = (32 - countLeadingZeros (v)) - lsb; assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); O << markup("<imm:") << '#' << lsb << markup(">") << ", " @@ -671,7 +695,13 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned val = MI->getOperand(OpNum).getImm(); - O << ARM_MB::MemBOptToString(val); + O << ARM_MB::MemBOptToString(val, (getAvailableFeatures() & ARM::HasV8Ops)); +} + +void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned val = MI->getOperand(OpNum).getImm(); + O << ARM_ISB::InstSyncBOptToString(val); } void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, @@ -889,6 +919,7 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, llvm_unreachable("Unhandled PC-relative pseudo-instruction!"); } +template<unsigned scale> void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); @@ -898,7 +929,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, return; } - int32_t OffImm = (int32_t)MO.getImm(); + int32_t OffImm = (int32_t)MO.getImm() << scale; O << markup("<imm:"); if (OffImm == INT32_MIN) @@ -931,7 +962,7 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, unsigned Mask = MI->getOperand(OpNum).getImm(); unsigned Firstcond = MI->getOperand(OpNum-1).getImm(); unsigned CondBit0 = Firstcond & 1; - unsigned NumTZ = CountTrailingZeros_32(Mask); + unsigned NumTZ = countTrailingZeros(Mask); assert(NumTZ <= 3 && "Invalid IT mask!"); for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { bool T = ((Mask >> Pos) & 1) == CondBit0; @@ -1059,6 +1090,7 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, O << "]" << markup(">"); } +template<bool AlwaysPrintImm0> void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -1069,22 +1101,25 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); + bool isSub = OffImm < 0; // Don't print +0. - if (OffImm != 0) - O << ", "; - if (OffImm != 0 && UseMarkup) - O << "<imm:"; if (OffImm == INT32_MIN) - O << "#-0"; - else if (OffImm < 0) - O << "#-" << -OffImm; - else if (OffImm > 0) - O << "#" << OffImm; - if (OffImm != 0 && UseMarkup) - O << ">"; + OffImm = 0; + if (isSub) { + O << ", " + << markup("<imm:") + << "#-" << -OffImm + << markup(">"); + } else if (AlwaysPrintImm0 || OffImm > 0) { + O << ", " + << markup("<imm:") + << "#" << OffImm + << markup(">"); + } O << "]" << markup(">"); } +template<bool AlwaysPrintImm0> void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -1100,22 +1135,24 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); + bool isSub = OffImm < 0; assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); // Don't print +0. - if (OffImm != 0) - O << ", "; - if (OffImm != 0 && UseMarkup) - O << "<imm:"; if (OffImm == INT32_MIN) - O << "#-0"; - else if (OffImm < 0) - O << "#-" << -OffImm; - else if (OffImm > 0) - O << "#" << OffImm; - if (OffImm != 0 && UseMarkup) - O << ">"; + OffImm = 0; + if (isSub) { + O << ", " + << markup("<imm:") + << "#-" << -OffImm + << markup(">"); + } else if (AlwaysPrintImm0 || OffImm > 0) { + O << ", " + << markup("<imm:") + << "#" << OffImm + << markup(">"); + } O << "]" << markup(">"); } @@ -1142,7 +1179,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm(); O << ", " << markup("<imm:"); - if (OffImm < 0) + if (OffImm == INT32_MIN) + O << "#-0"; + else if (OffImm < 0) O << "#-" << -OffImm; else O << "#" << OffImm; @@ -1157,19 +1196,14 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); - // Don't print +0. - if (OffImm != 0) - O << ", "; - if (OffImm != 0 && UseMarkup) - O << "<imm:"; + O << ", " << markup("<imm:"); if (OffImm == INT32_MIN) O << "#-0"; else if (OffImm < 0) O << "#-" << -OffImm; - else if (OffImm > 0) + else O << "#" << OffImm; - if (OffImm != 0 && UseMarkup) - O << ">"; + O << markup(">"); } void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 344104e87359..15ae8d1267f7 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -71,10 +71,12 @@ public: void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printInstSyncBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template <unsigned scale> void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -96,8 +98,10 @@ public: template<bool AlwaysPrintImm0> void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template<bool AlwaysPrintImm0> void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template<bool AlwaysPrintImm0> void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum, diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index 62473b2bfdee..b6c85c2e9466 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -140,7 +140,7 @@ namespace ARM_AM { if ((Imm & ~255U) == 0) return 0; // Use CTZ to compute the rotate amount. - unsigned TZ = CountTrailingZeros_32(Imm); + unsigned TZ = countTrailingZeros(Imm); // Rotate amount must be even. Something like 0x200 must be rotated 8 bits, // not 9. @@ -153,7 +153,7 @@ namespace ARM_AM { // For values like 0xF000000F, we should ignore the low 6 bits, then // retry the hunt. if (Imm & 63U) { - unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U); + unsigned TZ2 = countTrailingZeros(Imm & ~63U); unsigned RotAmt2 = TZ2 & ~1; if ((rotr32(Imm, RotAmt2) & ~255U) == 0) return (32-RotAmt2)&31; // HW rotates right, not left. @@ -221,7 +221,7 @@ namespace ARM_AM { if ((Imm & ~255U) == 0) return 0; // Use CTZ to compute the shift amount. - return CountTrailingZeros_32(Imm); + return countTrailingZeros(Imm); } /// isThumbImmShiftedVal - Return true if the specified value can be obtained @@ -240,7 +240,7 @@ namespace ARM_AM { if ((Imm & ~65535U) == 0) return 0; // Use CTZ to compute the shift amount. - return CountTrailingZeros_32(Imm); + return countTrailingZeros(Imm); } /// isThumbImm16ShiftedVal - Return true if the specified value can be @@ -296,7 +296,7 @@ namespace ARM_AM { /// encoding is possible. /// See ARM Reference Manual A6.3.2. static inline int getT2SOImmValRotateVal(unsigned V) { - unsigned RotAmt = CountLeadingZeros_32(V); + unsigned RotAmt = countLeadingZeros(V); if (RotAmt >= 24) return -1; @@ -328,7 +328,7 @@ namespace ARM_AM { static inline unsigned getT2SOImmValRotate(unsigned V) { if ((V & ~255U) == 0) return 0; // Use CTZ to compute the rotate amount. - unsigned RotAmt = CountTrailingZeros_32(V); + unsigned RotAmt = countTrailingZeros(V); return (32 - RotAmt) & 31; } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index e66e98567873..5615b808fc11 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -25,9 +25,9 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -152,7 +152,7 @@ static unsigned getRelaxedOpcode(unsigned Op) { switch (Op) { default: return Op; case ARM::tBcc: return ARM::t2Bcc; - case ARM::tLDRpciASM: return ARM::t2LDRpci; + case ARM::tLDRpci: return ARM::t2LDRpci; case ARM::tADR: return ARM::t2ADR; case ARM::tB: return ARM::t2B; } @@ -419,7 +419,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; uint32_t imm10Bits = (offset & 0x1FF800) >> 11; uint32_t imm11Bits = (offset & 0x000007FF); - + uint32_t Binary = 0; uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | @@ -434,8 +434,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00) // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S). - // The value is encoded into disjoint bit positions in the destination - // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, + // The value is encoded into disjoint bit positions in the destination + // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, // J = either J1 or J2 bit, 0 = zero. // // BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0 @@ -450,10 +450,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; uint32_t imm10HBits = (offset & 0xFFC00) >> 10; uint32_t imm10LBits = (offset & 0x3FF); - + uint32_t Binary = 0; uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); - uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | ((uint16_t)imm10LBits) << 1); Binary |= secondHalf << 16; Binary |= firstHalf; @@ -640,16 +640,16 @@ public: // FIXME: This should be in a separate file. class DarwinARMAsmBackend : public ARMAsmBackend { public: - const object::mach::CPUSubtypeARM Subtype; + const MachO::CPUSubTypeARM Subtype; DarwinARMAsmBackend(const Target &T, const StringRef TT, - object::mach::CPUSubtypeARM st) + MachO::CPUSubTypeARM st) : ARMAsmBackend(T, TT), Subtype(st) { HasDataInCodeSupport = true; } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, - object::mach::CTM_ARM, + MachO::CPU_TYPE_ARM, Subtype); } @@ -660,28 +660,33 @@ public: } // end anonymous namespace -MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) { +MCAsmBackend *llvm::createARMAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { - object::mach::CPUSubtypeARM CS = - StringSwitch<object::mach::CPUSubtypeARM>(TheTriple.getArchName()) - .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T) - .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ) - .Cases("armv6", "thumbv6", object::mach::CSARM_V6) - .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M) - .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM) - .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F) - .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K) - .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M) - .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S) - .Default(object::mach::CSARM_V7); + MachO::CPUSubTypeARM CS = + StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName()) + .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) + .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ) + .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6) + .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M) + .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM) + .Cases("armv7f", "thumbv7f", MachO::CPU_SUBTYPE_ARM_V7F) + .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K) + .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M) + .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S) + .Default(MachO::CPU_SUBTYPE_ARM_V7); return new DarwinARMAsmBackend(T, TT, CS); } - if (TheTriple.isOSWindows()) +#if 0 + // FIXME: Introduce yet another checker but assert(0). + if (TheTriple.isOSBinFormatCOFF()) assert(0 && "Windows not supported on ARM"); +#endif uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); return new ELFARMAsmBackend(T, TT, OSABI); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index de48a0e0f30a..af939fc19129 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -121,46 +121,89 @@ namespace ARM_MB { // the option field for memory barrier operations. enum MemBOpt { RESERVED_0 = 0, - RESERVED_1 = 1, + OSHLD = 1, OSHST = 2, OSH = 3, RESERVED_4 = 4, - RESERVED_5 = 5, + NSHLD = 5, NSHST = 6, NSH = 7, RESERVED_8 = 8, - RESERVED_9 = 9, + ISHLD = 9, ISHST = 10, ISH = 11, RESERVED_12 = 12, - RESERVED_13 = 13, + LD = 13, ST = 14, SY = 15 }; - inline static const char *MemBOptToString(unsigned val) { + inline static const char *MemBOptToString(unsigned val, bool HasV8) { switch (val) { default: llvm_unreachable("Unknown memory operation"); case SY: return "sy"; case ST: return "st"; - case RESERVED_13: return "#0xd"; + case LD: return HasV8 ? "ld" : "#0xd"; case RESERVED_12: return "#0xc"; case ISH: return "ish"; case ISHST: return "ishst"; - case RESERVED_9: return "#0x9"; + case ISHLD: return HasV8 ? "ishld" : "#0x9"; case RESERVED_8: return "#0x8"; case NSH: return "nsh"; case NSHST: return "nshst"; - case RESERVED_5: return "#0x5"; + case NSHLD: return HasV8 ? "nshld" : "#0x5"; case RESERVED_4: return "#0x4"; case OSH: return "osh"; case OSHST: return "oshst"; - case RESERVED_1: return "#0x1"; + case OSHLD: return HasV8 ? "oshld" : "#0x1"; case RESERVED_0: return "#0x0"; } } } // namespace ARM_MB +namespace ARM_ISB { + enum InstSyncBOpt { + RESERVED_0 = 0, + RESERVED_1 = 1, + RESERVED_2 = 2, + RESERVED_3 = 3, + RESERVED_4 = 4, + RESERVED_5 = 5, + RESERVED_6 = 6, + RESERVED_7 = 7, + RESERVED_8 = 8, + RESERVED_9 = 9, + RESERVED_10 = 10, + RESERVED_11 = 11, + RESERVED_12 = 12, + RESERVED_13 = 13, + RESERVED_14 = 14, + SY = 15 + }; + + inline static const char *InstSyncBOptToString(unsigned val) { + switch (val) { + default: llvm_unreachable("Unkown memory operation"); + case RESERVED_0: return "#0x0"; + case RESERVED_1: return "#0x1"; + case RESERVED_2: return "#0x2"; + case RESERVED_3: return "#0x3"; + case RESERVED_4: return "#0x4"; + case RESERVED_5: return "#0x5"; + case RESERVED_6: return "#0x6"; + case RESERVED_7: return "#0x7"; + case RESERVED_8: return "#0x8"; + case RESERVED_9: return "#0x9"; + case RESERVED_10: return "#0xa"; + case RESERVED_11: return "#0xb"; + case RESERVED_12: return "#0xc"; + case RESERVED_13: return "#0xd"; + case RESERVED_14: return "#0xe"; + case SY: return "sy"; + } + } +} // namespace ARM_ISB + /// isARMLowRegister - Returns true if the register is a low register (r0-r7). /// static inline bool isARMLowRegister(unsigned Reg) { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 6c3d2476688c..471897de5c1c 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -13,6 +13,8 @@ // //===----------------------------------------------------------------------===// +#include "ARMBuildAttrs.h" +#include "ARMFPUName.h" #include "ARMRegisterInfo.h" #include "ARMUnwindOp.h" #include "ARMUnwindOpAsm.h" @@ -27,6 +29,7 @@ #include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" @@ -36,7 +39,9 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> using namespace llvm; @@ -45,8 +50,218 @@ static std::string GetAEABIUnwindPersonalityName(unsigned Index) { return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str(); } +static const char *GetFPUName(unsigned ID) { + switch (ID) { + default: + llvm_unreachable("Unknown FPU kind"); + break; +#define ARM_FPU_NAME(NAME, ID) case ARM::ID: return NAME; +#include "ARMFPUName.def" + } + return NULL; +} + namespace { +class ARMELFStreamer; + +class ARMTargetAsmStreamer : public ARMTargetStreamer { + formatted_raw_ostream &OS; + MCInstPrinter &InstPrinter; + + virtual void emitFnStart(); + virtual void emitFnEnd(); + virtual void emitCantUnwind(); + virtual void emitPersonality(const MCSymbol *Personality); + virtual void emitHandlerData(); + virtual void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0); + virtual void emitPad(int64_t Offset); + virtual void emitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool isVector); + + virtual void switchVendor(StringRef Vendor); + virtual void emitAttribute(unsigned Attribute, unsigned Value); + virtual void emitTextAttribute(unsigned Attribute, StringRef String); + virtual void emitFPU(unsigned FPU); + virtual void finishAttributeSection(); + +public: + ARMTargetAsmStreamer(formatted_raw_ostream &OS, MCInstPrinter &InstPrinter); +}; + +ARMTargetAsmStreamer::ARMTargetAsmStreamer(formatted_raw_ostream &OS, + MCInstPrinter &InstPrinter) + : OS(OS), InstPrinter(InstPrinter) {} +void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; } +void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; } +void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; } +void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) { + OS << "\t.personality " << Personality->getName() << '\n'; +} +void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; } +void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, + int64_t Offset) { + OS << "\t.setfp\t"; + InstPrinter.printRegName(OS, FpReg); + OS << ", "; + InstPrinter.printRegName(OS, SpReg); + if (Offset) + OS << ", #" << Offset; + OS << '\n'; +} +void ARMTargetAsmStreamer::emitPad(int64_t Offset) { + OS << "\t.pad\t#" << Offset << '\n'; +} +void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool isVector) { + assert(RegList.size() && "RegList should not be empty"); + if (isVector) + OS << "\t.vsave\t{"; + else + OS << "\t.save\t{"; + + InstPrinter.printRegName(OS, RegList[0]); + + for (unsigned i = 1, e = RegList.size(); i != e; ++i) { + OS << ", "; + InstPrinter.printRegName(OS, RegList[i]); + } + + OS << "}\n"; +} +void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) { +} +void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) { + OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value) << "\n"; +} +void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute, + StringRef String) { + switch (Attribute) { + default: llvm_unreachable("Unsupported Text attribute in ASM Mode"); + case ARMBuildAttrs::CPU_name: + OS << "\t.cpu\t" << String.lower() << "\n"; + break; + } +} +void ARMTargetAsmStreamer::emitFPU(unsigned FPU) { + OS << "\t.fpu\t" << GetFPUName(FPU) << "\n"; +} +void ARMTargetAsmStreamer::finishAttributeSection() { +} + +class ARMTargetELFStreamer : public ARMTargetStreamer { +private: + // This structure holds all attributes, accounting for + // their string/numeric value, so we can later emmit them + // in declaration order, keeping all in the same vector + struct AttributeItem { + enum { + HiddenAttribute = 0, + NumericAttribute, + TextAttribute + } Type; + unsigned Tag; + unsigned IntValue; + StringRef StringValue; + + static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) { + return (LHS.Tag < RHS.Tag); + } + }; + + StringRef CurrentVendor; + unsigned FPU; + SmallVector<AttributeItem, 64> Contents; + + const MCSection *AttributeSection; + + // FIXME: this should be in a more generic place, but + // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf + static size_t getULEBSize(int Value) { + size_t Size = 0; + do { + Value >>= 7; + Size += sizeof(int8_t); // Is this really necessary? + } while (Value); + return Size; + } + + AttributeItem *getAttributeItem(unsigned Attribute) { + for (size_t i = 0; i < Contents.size(); ++i) + if (Contents[i].Tag == Attribute) + return &Contents[i]; + return 0; + } + + void setAttributeItem(unsigned Attribute, unsigned Value, + bool OverwriteExisting) { + // Look for existing attribute item + if (AttributeItem *Item = getAttributeItem(Attribute)) { + if (!OverwriteExisting) + return; + Item->IntValue = Value; + return; + } + + // Create new attribute item + AttributeItem Item = { + AttributeItem::NumericAttribute, + Attribute, + Value, + StringRef("") + }; + Contents.push_back(Item); + } + + void setAttributeItem(unsigned Attribute, StringRef Value, + bool OverwriteExisting) { + // Look for existing attribute item + if (AttributeItem *Item = getAttributeItem(Attribute)) { + if (!OverwriteExisting) + return; + Item->StringValue = Value; + return; + } + + // Create new attribute item + AttributeItem Item = { + AttributeItem::TextAttribute, + Attribute, + 0, + Value + }; + Contents.push_back(Item); + } + + void emitFPUDefaultAttributes(); + + ARMELFStreamer &getStreamer(); + + virtual void emitFnStart(); + virtual void emitFnEnd(); + virtual void emitCantUnwind(); + virtual void emitPersonality(const MCSymbol *Personality); + virtual void emitHandlerData(); + virtual void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0); + virtual void emitPad(int64_t Offset); + virtual void emitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool isVector); + + virtual void switchVendor(StringRef Vendor); + virtual void emitAttribute(unsigned Attribute, unsigned Value); + virtual void emitTextAttribute(unsigned Attribute, StringRef String); + virtual void emitFPU(unsigned FPU); + virtual void finishAttributeSection(); + + size_t calculateContentSize() const; + +public: + ARMTargetELFStreamer() + : ARMTargetStreamer(), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU), + AttributeSection(0) { + } +}; + /// Extend the generic ELFStreamer class so that it can emit mapping symbols at /// the appropriate points in the object files. These symbols are defined in the /// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf. @@ -61,27 +276,29 @@ namespace { /// by MachO. Beware! class ARMELFStreamer : public MCELFStreamer { public: - ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter, bool IsThumb) - : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter), + friend class ARMTargetELFStreamer; + + ARMELFStreamer(MCContext &Context, MCTargetStreamer *TargetStreamer, + MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, + bool IsThumb) + : MCELFStreamer(Context, TargetStreamer, TAB, OS, Emitter), IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) { Reset(); } ~ARMELFStreamer() {} + virtual void FinishImpl(); + // ARM exception handling directives - virtual void EmitFnStart(); - virtual void EmitFnEnd(); - virtual void EmitCantUnwind(); - virtual void EmitPersonality(const MCSymbol *Per); - virtual void EmitHandlerData(); - virtual void EmitSetFP(unsigned NewFpReg, - unsigned NewSpReg, - int64_t Offset = 0); - virtual void EmitPad(int64_t Offset); - virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, - bool isVector); + void emitFnStart(); + void emitFnEnd(); + void emitCantUnwind(); + void emitPersonality(const MCSymbol *Per); + void emitHandlerData(); + void emitSetFP(unsigned NewFpReg, unsigned NewSpReg, int64_t Offset = 0); + void emitPad(int64_t Offset); + void emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector); virtual void ChangeSection(const MCSection *Section, const MCExpr *Subsection) { @@ -109,18 +326,17 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. - virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + virtual void EmitBytes(StringRef Data) { EmitDataMappingSymbol(); - MCELFStreamer::EmitBytes(Data, AddrSpace); + MCELFStreamer::EmitBytes(Data); } /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) { EmitDataMappingSymbol(); - MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + MCELFStreamer::EmitValueImpl(Value, Size); } virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { @@ -142,10 +358,6 @@ public: } } - static bool classof(const MCStreamer *S) { - return S->getKind() == SK_ARMELFStreamer; - } - private: enum ElfMappingSymbol { EMS_None, @@ -184,7 +396,7 @@ private: MCELF::SetType(SD, ELF::STT_NOTYPE); MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); - Symbol->setSection(*getCurrentSection().first); + AssignSection(Symbol, getCurrentSection().first); const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); Symbol->setVariableValue(Value); @@ -203,7 +415,8 @@ private: void Reset(); void EmitPersonalityFixup(StringRef Name); - void CollectUnwindOpcodes(); + void FlushPendingOffset(); + void FlushUnwindOpcodes(bool NoHandlerData); void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, SectionKind Kind, const MCSymbol &Fn); @@ -220,17 +433,236 @@ private: MCSymbol *ExTab; MCSymbol *FnStart; const MCSymbol *Personality; - uint32_t VFPRegSave; // Register mask for {d31-d0} - uint32_t RegSave; // Register mask for {r15-r0} - int64_t SPOffset; - uint16_t FPReg; - int64_t FPOffset; + unsigned PersonalityIndex; + unsigned FPReg; // Frame pointer register + int64_t FPOffset; // Offset: (final frame pointer) - (initial $sp) + int64_t SPOffset; // Offset: (final $sp) - (initial $sp) + int64_t PendingOffset; // Offset: (final $sp) - (emitted $sp) bool UsedFP; bool CantUnwind; + SmallVector<uint8_t, 64> Opcodes; UnwindOpcodeAssembler UnwindOpAsm; }; } // end anonymous namespace +ARMELFStreamer &ARMTargetELFStreamer::getStreamer() { + ARMELFStreamer *S = static_cast<ARMELFStreamer *>(Streamer); + return *S; +} + +void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); } +void ARMTargetELFStreamer::emitFnEnd() { getStreamer().emitFnEnd(); } +void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); } +void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) { + getStreamer().emitPersonality(Personality); +} +void ARMTargetELFStreamer::emitHandlerData() { + getStreamer().emitHandlerData(); +} +void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, + int64_t Offset) { + getStreamer().emitSetFP(FpReg, SpReg, Offset); +} +void ARMTargetELFStreamer::emitPad(int64_t Offset) { + getStreamer().emitPad(Offset); +} +void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool isVector) { + getStreamer().emitRegSave(RegList, isVector); +} +void ARMTargetELFStreamer::switchVendor(StringRef Vendor) { + assert(!Vendor.empty() && "Vendor cannot be empty."); + + if (CurrentVendor == Vendor) + return; + + if (!CurrentVendor.empty()) + finishAttributeSection(); + + assert(Contents.empty() && + ".ARM.attributes should be flushed before changing vendor"); + CurrentVendor = Vendor; + +} +void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) { + setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true); +} +void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute, + StringRef Value) { + setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true); +} +void ARMTargetELFStreamer::emitFPU(unsigned Value) { + FPU = Value; +} +void ARMTargetELFStreamer::emitFPUDefaultAttributes() { + switch (FPU) { + case ARM::VFP: + case ARM::VFPV2: + setAttributeItem(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv2, + /* OverwriteExisting= */ false); + break; + + case ARM::VFPV3: + setAttributeItem(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv3A, + /* OverwriteExisting= */ false); + break; + + case ARM::VFPV3_D16: + setAttributeItem(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv3B, + /* OverwriteExisting= */ false); + break; + + case ARM::VFPV4: + setAttributeItem(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv4A, + /* OverwriteExisting= */ false); + break; + + case ARM::VFPV4_D16: + setAttributeItem(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv4B, + /* OverwriteExisting= */ false); + break; + + case ARM::FP_ARMV8: + setAttributeItem(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPARMv8A, + /* OverwriteExisting= */ false); + break; + + case ARM::NEON: + setAttributeItem(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv3A, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::AllowNeon, + /* OverwriteExisting= */ false); + break; + + case ARM::NEON_VFPV4: + setAttributeItem(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv4A, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::AllowNeon2, + /* OverwriteExisting= */ false); + break; + + case ARM::NEON_FP_ARMV8: + case ARM::CRYPTO_NEON_FP_ARMV8: + setAttributeItem(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPARMv8A, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::AllowNeonARMv8, + /* OverwriteExisting= */ false); + break; + + default: + report_fatal_error("Unknown FPU: " + Twine(FPU)); + break; + } +} +size_t ARMTargetELFStreamer::calculateContentSize() const { + size_t Result = 0; + for (size_t i = 0; i < Contents.size(); ++i) { + AttributeItem item = Contents[i]; + switch (item.Type) { + case AttributeItem::HiddenAttribute: + break; + case AttributeItem::NumericAttribute: + Result += getULEBSize(item.Tag); + Result += getULEBSize(item.IntValue); + break; + case AttributeItem::TextAttribute: + Result += getULEBSize(item.Tag); + Result += item.StringValue.size() + 1; // string + '\0' + break; + } + } + return Result; +} +void ARMTargetELFStreamer::finishAttributeSection() { + // <format-version> + // [ <section-length> "vendor-name" + // [ <file-tag> <size> <attribute>* + // | <section-tag> <size> <section-number>* 0 <attribute>* + // | <symbol-tag> <size> <symbol-number>* 0 <attribute>* + // ]+ + // ]* + + if (FPU != ARM::INVALID_FPU) + emitFPUDefaultAttributes(); + + if (Contents.empty()) + return; + + std::sort(Contents.begin(), Contents.end(), AttributeItem::LessTag); + + ARMELFStreamer &Streamer = getStreamer(); + + // Switch to .ARM.attributes section + if (AttributeSection) { + Streamer.SwitchSection(AttributeSection); + } else { + AttributeSection = + Streamer.getContext().getELFSection(".ARM.attributes", + ELF::SHT_ARM_ATTRIBUTES, + 0, + SectionKind::getMetadata()); + Streamer.SwitchSection(AttributeSection); + + // Format version + Streamer.EmitIntValue(0x41, 1); + } + + // Vendor size + Vendor name + '\0' + const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1; + + // Tag + Tag Size + const size_t TagHeaderSize = 1 + 4; + + const size_t ContentsSize = calculateContentSize(); + + Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4); + Streamer.EmitBytes(CurrentVendor); + Streamer.EmitIntValue(0, 1); // '\0' + + Streamer.EmitIntValue(ARMBuildAttrs::File, 1); + Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4); + + // Size should have been accounted for already, now + // emit each field as its type (ULEB or String) + for (size_t i = 0; i < Contents.size(); ++i) { + AttributeItem item = Contents[i]; + Streamer.EmitULEB128IntValue(item.Tag); + switch (item.Type) { + default: llvm_unreachable("Invalid attribute type"); + case AttributeItem::NumericAttribute: + Streamer.EmitULEB128IntValue(item.IntValue); + break; + case AttributeItem::TextAttribute: + Streamer.EmitBytes(item.StringValue.upper()); + Streamer.EmitIntValue(0, 1); // '\0' + break; + } + } + + Contents.clear(); + FPU = ARM::INVALID_FPU; +} + +void ARMELFStreamer::FinishImpl() { + MCTargetStreamer &TS = getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); + ATS.finishAttributeSection(); + + MCELFStreamer::FinishImpl(); +} + inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, @@ -279,81 +711,37 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { } void ARMELFStreamer::Reset() { - const MCRegisterInfo &MRI = getContext().getRegisterInfo(); - ExTab = NULL; FnStart = NULL; Personality = NULL; - VFPRegSave = 0; - RegSave = 0; - FPReg = MRI.getEncodingValue(ARM::SP); + PersonalityIndex = NUM_PERSONALITY_INDEX; + FPReg = ARM::SP; FPOffset = 0; SPOffset = 0; + PendingOffset = 0; UsedFP = false; CantUnwind = false; + Opcodes.clear(); UnwindOpAsm.Reset(); } -// Add the R_ARM_NONE fixup at the same position -void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { - const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name); - - const MCSymbolRefExpr *PersonalityRef = - MCSymbolRefExpr::Create(PersonalitySym, - MCSymbolRefExpr::VK_ARM_NONE, - getContext()); - - AddValueSymbols(PersonalityRef); - MCDataFragment *DF = getOrCreateDataFragment(); - DF->getFixups().push_back( - MCFixup::Create(DF->getContents().size(), PersonalityRef, - MCFixup::getKindForSize(4, false))); -} - -void ARMELFStreamer::CollectUnwindOpcodes() { - if (UsedFP) { - UnwindOpAsm.EmitSetFP(FPReg); - UnwindOpAsm.EmitSPOffset(-FPOffset); - } else { - UnwindOpAsm.EmitSPOffset(SPOffset); - } - UnwindOpAsm.EmitVFPRegSave(VFPRegSave); - UnwindOpAsm.EmitRegSave(RegSave); - UnwindOpAsm.Finalize(); -} - -void ARMELFStreamer::EmitFnStart() { +void ARMELFStreamer::emitFnStart() { assert(FnStart == 0); FnStart = getContext().CreateTempSymbol(); EmitLabel(FnStart); } -void ARMELFStreamer::EmitFnEnd() { +void ARMELFStreamer::emitFnEnd() { assert(FnStart && ".fnstart must preceeds .fnend"); // Emit unwind opcodes if there is no .handlerdata directive - if (!ExTab && !CantUnwind) { - CollectUnwindOpcodes(); - - unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex(); - if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 || - PersonalityIndex == AEABI_UNWIND_CPP_PR2) { - // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to - // emit the unwind opcodes in the corresponding ".ARM.extab" section, and - // then emit a reference to these unwind opcodes in the second word of - // the exception index table entry. - SwitchToExTabSection(*FnStart); - ExTab = getContext().CreateTempSymbol(); - EmitLabel(ExTab); - EmitBytes(UnwindOpAsm.data(), 0); - } - } + if (!ExTab && !CantUnwind) + FlushUnwindOpcodes(true); // Emit the exception index table entry SwitchToExIdxSection(*FnStart); - unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex(); if (PersonalityIndex < NUM_PERSONALITY_INDEX) EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex)); @@ -362,37 +750,80 @@ void ARMELFStreamer::EmitFnEnd() { MCSymbolRefExpr::VK_ARM_PREL31, getContext()); - EmitValue(FnStartRef, 4, 0); + EmitValue(FnStartRef, 4); if (CantUnwind) { - EmitIntValue(EXIDX_CANTUNWIND, 4, 0); + EmitIntValue(EXIDX_CANTUNWIND, 4); } else if (ExTab) { // Emit a reference to the unwind opcodes in the ".ARM.extab" section. const MCSymbolRefExpr *ExTabEntryRef = MCSymbolRefExpr::Create(ExTab, MCSymbolRefExpr::VK_ARM_PREL31, getContext()); - EmitValue(ExTabEntryRef, 4, 0); + EmitValue(ExTabEntryRef, 4); } else { // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in // the second word of exception index table entry. The size of the unwind // opcodes should always be 4 bytes. assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 && "Compact model must use __aeabi_cpp_unwind_pr0 as personality"); - assert(UnwindOpAsm.size() == 4u && + assert(Opcodes.size() == 4u && "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4"); - EmitBytes(UnwindOpAsm.data(), 0); + EmitBytes(StringRef(reinterpret_cast<const char*>(Opcodes.data()), + Opcodes.size())); } + // Switch to the section containing FnStart + SwitchSection(&FnStart->getSection()); + // Clean exception handling frame information Reset(); } -void ARMELFStreamer::EmitCantUnwind() { - CantUnwind = true; +void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; } + +// Add the R_ARM_NONE fixup at the same position +void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { + const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name); + + const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create( + PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext()); + + AddValueSymbols(PersonalityRef); + MCDataFragment *DF = getOrCreateDataFragment(); + DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(), + PersonalityRef, + MCFixup::getKindForSize(4, false))); +} + +void ARMELFStreamer::FlushPendingOffset() { + if (PendingOffset != 0) { + UnwindOpAsm.EmitSPOffset(-PendingOffset); + PendingOffset = 0; + } } -void ARMELFStreamer::EmitHandlerData() { +void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) { + // Emit the unwind opcode to restore $sp. + if (UsedFP) { + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); + int64_t LastRegSaveSPOffset = SPOffset - PendingOffset; + UnwindOpAsm.EmitSPOffset(LastRegSaveSPOffset - FPOffset); + UnwindOpAsm.EmitSetSP(MRI->getEncodingValue(FPReg)); + } else { + FlushPendingOffset(); + } + + // Finalize the unwind opcode sequence + UnwindOpAsm.Finalize(PersonalityIndex, Opcodes); + + // For compact model 0, we have to emit the unwind opcodes in the .ARM.exidx + // section. Thus, we don't have to create an entry in the .ARM.extab + // section. + if (NoHandlerData && PersonalityIndex == AEABI_UNWIND_CPP_PR0) + return; + + // Switch to .ARM.extab section. SwitchToExTabSection(*FnStart); // Create .ARM.extab label for offset in .ARM.exidx @@ -400,73 +831,117 @@ void ARMELFStreamer::EmitHandlerData() { ExTab = getContext().CreateTempSymbol(); EmitLabel(ExTab); - // Emit Personality - assert(Personality && ".personality directive must preceed .handlerdata"); - - const MCSymbolRefExpr *PersonalityRef = - MCSymbolRefExpr::Create(Personality, - MCSymbolRefExpr::VK_ARM_PREL31, - getContext()); + // Emit personality + if (Personality) { + const MCSymbolRefExpr *PersonalityRef = + MCSymbolRefExpr::Create(Personality, + MCSymbolRefExpr::VK_ARM_PREL31, + getContext()); - EmitValue(PersonalityRef, 4, 0); + EmitValue(PersonalityRef, 4); + } // Emit unwind opcodes - CollectUnwindOpcodes(); - EmitBytes(UnwindOpAsm.data(), 0); + EmitBytes(StringRef(reinterpret_cast<const char *>(Opcodes.data()), + Opcodes.size())); + + // According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or + // __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted + // after the unwind opcodes. The handler data consists of several 32-bit + // words, and should be terminated by zero. + // + // In case that the .handlerdata directive is not specified by the + // programmer, we should emit zero to terminate the handler data. + if (NoHandlerData && !Personality) + EmitIntValue(0, 4); } -void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) { +void ARMELFStreamer::emitHandlerData() { FlushUnwindOpcodes(false); } + +void ARMELFStreamer::emitPersonality(const MCSymbol *Per) { Personality = Per; UnwindOpAsm.setPersonality(Per); } -void ARMELFStreamer::EmitSetFP(unsigned NewFPReg, - unsigned NewSPReg, +void ARMELFStreamer::emitSetFP(unsigned NewFPReg, unsigned NewSPReg, int64_t Offset) { - assert(SPOffset == 0 && - "Current implementation assumes .setfp precedes .pad"); - - const MCRegisterInfo &MRI = getContext().getRegisterInfo(); - - uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg); -#ifndef NDEBUG - uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg); -#endif - - assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) && + assert((NewSPReg == ARM::SP || NewSPReg == FPReg) && "the operand of .setfp directive should be either $sp or $fp"); UsedFP = true; - FPReg = NewFPRegEncVal; - FPOffset = Offset; -} + FPReg = NewFPReg; -void ARMELFStreamer::EmitPad(int64_t Offset) { - SPOffset += Offset; + if (NewSPReg == ARM::SP) + FPOffset = SPOffset + Offset; + else + FPOffset += Offset; } -void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, - bool IsVector) { - const MCRegisterInfo &MRI = getContext().getRegisterInfo(); +void ARMELFStreamer::emitPad(int64_t Offset) { + // Track the change of the $sp offset + SPOffset -= Offset; -#ifndef NDEBUG - unsigned Max = IsVector ? 32 : 16; -#endif - uint32_t &RegMask = IsVector ? VFPRegSave : RegSave; + // To squash multiple .pad directives, we should delay the unwind opcode + // until the .save, .vsave, .handlerdata, or .fnend directives. + PendingOffset -= Offset; +} +void ARMELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool IsVector) { + // Collect the registers in the register list + unsigned Count = 0; + uint32_t Mask = 0; + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); for (size_t i = 0; i < RegList.size(); ++i) { - unsigned Reg = MRI.getEncodingValue(RegList[i]); - assert(Reg < Max && "Register encoded value out of range"); - RegMask |= 1u << Reg; + unsigned Reg = MRI->getEncodingValue(RegList[i]); + assert(Reg < (IsVector ? 32U : 16U) && "Register out of range"); + unsigned Bit = (1u << Reg); + if ((Mask & Bit) == 0) { + Mask |= Bit; + ++Count; + } } + + // Track the change the $sp offset: For the .save directive, the + // corresponding push instruction will decrease the $sp by (4 * Count). + // For the .vsave directive, the corresponding vpush instruction will + // decrease $sp by (8 * Count). + SPOffset -= Count * (IsVector ? 8 : 4); + + // Emit the opcode + FlushPendingOffset(); + if (IsVector) + UnwindOpAsm.EmitVFPRegSave(Mask); + else + UnwindOpAsm.EmitRegSave(Mask); } namespace llvm { + +MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, + bool isVerboseAsm, bool useLoc, bool useCFI, + bool useDwarfDirectory, + MCInstPrinter *InstPrint, MCCodeEmitter *CE, + MCAsmBackend *TAB, bool ShowInst) { + ARMTargetAsmStreamer *S = new ARMTargetAsmStreamer(OS, *InstPrint); + + return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI, + useDwarfDirectory, InstPrint, CE, TAB, + ShowInst); +} + MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack, bool IsThumb) { - ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); + ARMTargetELFStreamer *TS = new ARMTargetELFStreamer(); + ARMELFStreamer *S = + new ARMELFStreamer(Context, TS, TAB, OS, Emitter, IsThumb); + // FIXME: This should eventually end up somewhere else where more + // intelligent flag decisions can be made. For now we are just maintaining + // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. + S->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); + if (RelaxAll) S->getAssembler().setRelaxAll(true); if (NoExecStack) diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h deleted file mode 100644 index 77ae5d23628e..000000000000 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h +++ /dev/null @@ -1,27 +0,0 @@ -//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF streamer information for the ARM backend. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM_ELF_STREAMER_H -#define ARM_ELF_STREAMER_H - -#include "llvm/MC/MCELFStreamer.h" - -namespace llvm { - - MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack, - bool IsThumb); -} - -#endif // ARM_ELF_STREAMER_H diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index c1aab9c72cca..ad796e660e96 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -49,8 +49,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; - WeakRefDirective = "\t.weak\t"; - HasLEB128 = true; SupportsDebugInformation = true; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index f0b289c6f3b6..e1f716d936ad 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -15,6 +15,7 @@ #define LLVM_ARMTARGETASMINFO_H #include "llvm/MC/MCAsmInfoDarwin.h" +#include "llvm/MC/MCAsmInfoELF.h" namespace llvm { @@ -24,7 +25,7 @@ namespace llvm { explicit ARMMCAsmInfoDarwin(); }; - class ARMELFMCAsmInfo : public MCAsmInfo { + class ARMELFMCAsmInfo : public MCAsmInfoELF { virtual void anchor(); public: explicit ARMELFMCAsmInfo(); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 7a59a7dd5055..4382d0d97144 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -58,8 +58,7 @@ public: } bool isTargetDarwin() const { Triple TT(STI.getTargetTriple()); - Triple::OSType OS = TT.getOS(); - return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS; + return TT.isOSDarwin(); } unsigned getMachineSoImmOpValue(unsigned SoImm) const; @@ -315,6 +314,8 @@ public: unsigned EncodedValue) const; unsigned NEONThumb2DupPostEncoder(const MCInst &MI, unsigned EncodedValue) const; + unsigned NEONThumb2V8PostEncoder(const MCInst &MI, + unsigned EncodedValue) const; unsigned VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const; @@ -389,6 +390,17 @@ unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI, return EncodedValue; } +/// Post-process encoded NEON v8 instructions, and rewrite them to Thumb2 form +/// if we are in Thumb2. +unsigned ARMMCCodeEmitter::NEONThumb2V8PostEncoder(const MCInst &MI, + unsigned EncodedValue) const { + if (isThumb2()) { + EncodedValue |= 0xC000000; // Set bits 27-26 + } + + return EncodedValue; +} + /// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite /// them to their Thumb2 form if we are currently in Thumb2 mode. unsigned ARMMCCodeEmitter:: @@ -407,7 +419,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const { if (MO.isReg()) { unsigned Reg = MO.getReg(); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg); // Q registers are encoded as 2x their register number. switch (Reg) { @@ -436,7 +448,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + Reg = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); int32_t SImm = MO1.getImm(); bool isAdd = true; @@ -625,8 +637,14 @@ getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { - unsigned Val = - ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups); + unsigned Val = 0; + const MCOperand MO = MI.getOperand(OpIdx); + + if(MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups); + else + Val = MO.getImm() >> 1; + bool I = (Val & 0x800000); bool J1 = (Val & 0x400000); bool J2 = (Val & 0x200000); @@ -652,7 +670,7 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, if (MO.isExpr()) return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12, Fixups); - int32_t offset = MO.getImm(); + int64_t offset = MO.getImm(); uint32_t Val = 0x2000; int SoImmVal; @@ -724,8 +742,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO1 = MI.getOperand(OpIdx); const MCOperand &MO2 = MI.getOperand(OpIdx + 1); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); - unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); + unsigned Rm = CTX.getRegisterInfo()->getEncodingValue(MO2.getReg()); return (Rm << 3) | Rn; } @@ -741,12 +759,12 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. Imm12 = 0; - isAdd = false ; // 'U' bit is set as part of the fixup. if (MO.isExpr()) { const MCExpr *Expr = MO.getExpr(); + isAdd = false ; // 'U' bit is set as part of the fixup. MCFixupKind Kind; if (isThumb2()) @@ -759,8 +777,10 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, } else { Reg = ARM::PC; int32_t Offset = MO.getImm(); - // FIXME: Handle #-0. - if (Offset < 0) { + if (Offset == INT32_MIN) { + Offset = 0; + isAdd = false; + } else if (Offset < 0) { Offset *= -1; isAdd = false; } @@ -821,7 +841,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. @@ -857,7 +877,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, // {7-0} = imm8 const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Reg = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); unsigned Imm8 = MO1.getImm(); return (Reg << 8) | Imm8; } @@ -940,8 +960,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); const MCOperand &MO2 = MI.getOperand(OpIdx+2); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); - unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); + unsigned Rm = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()); bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add; ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm()); @@ -975,7 +995,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, // {12} isAdd // {11-0} imm12/Rm const MCOperand &MO = MI.getOperand(OpIdx); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups); Binary |= Rn << 14; return Binary; @@ -998,7 +1018,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm); Binary <<= 7; // Shift amount is bits [11:7] Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5] - Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0] + Binary |= CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Rm is bits [3:0] } return Binary | (isAdd << 12) | (isReg << 13); } @@ -1011,7 +1031,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); bool isAdd = MO1.getImm() != 0; - return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4); + return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()) | (isAdd << 4); } uint32_t ARMMCCodeEmitter:: @@ -1029,7 +1049,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + Imm8 = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); return Imm8 | (isAdd << 8) | (isImm << 9); } @@ -1047,7 +1067,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. if (!MO.isReg()) { - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); @@ -1057,14 +1077,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, ++MCNumCPRelocations; return (Rn << 9) | (1 << 13); } - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); unsigned Imm = MO2.getImm(); bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add; bool isImm = MO1.getReg() == 0; uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + Imm8 = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13); } @@ -1092,7 +1112,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); unsigned Imm5 = MO1.getImm(); return ((Imm5 & 0x1f) << 3) | Rn; } @@ -1119,7 +1139,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false; // 'U' bit is handled as part of the fixup. @@ -1165,7 +1185,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); // Encode Rm. - unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1190,7 +1210,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, // Encode the shift operation Rs. // Encode Rs bit[11:8]. assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); + return Binary | (CTX.getRegisterInfo()->getEncodingValue(Rs) << ARMII::RegRsShift); } unsigned ARMMCCodeEmitter:: @@ -1209,7 +1229,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1248,9 +1268,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, // Encoded as [Rn, Rm, imm]. // FIXME: Needs fixup support. - unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); Value <<= 4; - Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); + Value |= CTX.getRegisterInfo()->getEncodingValue(MO2.getReg()); Value <<= 2; Value |= MO3.getImm(); @@ -1264,7 +1284,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum, const MCOperand &MO2 = MI.getOperand(OpNum+1); // FIXME: Needs fixup support. - unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); // Even though the immediate is 8 bits long, we need 9 bits in order // to represent the (inverse of the) sign bit. @@ -1326,7 +1346,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1359,8 +1379,8 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, // msb of the mask. const MCOperand &MO = MI.getOperand(Op); uint32_t v = ~MO.getImm(); - uint32_t lsb = CountTrailingZeros_32(v); - uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1; + uint32_t lsb = countTrailingZeros(v); + uint32_t msb = (32 - countLeadingZeros (v)) - 1; assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!"); return lsb | (msb << 5); } @@ -1382,7 +1402,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, if (SPRRegs || DPRRegs) { // VLDM/VSTM - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg); unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff; Binary |= (RegNo & 0x1f) << 8; if (SPRRegs) @@ -1391,7 +1411,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, Binary |= NumRegs * 2; } else { for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) { - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(MI.getOperand(I).getReg()); Binary |= 1 << RegNo; } } @@ -1407,7 +1427,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1430,7 +1450,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1456,7 +1476,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1475,7 +1495,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(Op); if (MO.getReg() == 0) return 0x0D; - return CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); } unsigned ARMMCCodeEmitter:: diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index f09fb5a94fd8..a99de0e78230 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -12,30 +12,73 @@ //===----------------------------------------------------------------------===// #include "ARMBaseInfo.h" -#include "ARMELFStreamer.h" #include "ARMMCAsmInfo.h" #include "ARMMCTargetDesc.h" #include "InstPrinter/ARMInstPrinter.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_REGINFO_MC_DESC #include "ARMGenRegisterInfo.inc" +static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, + std::string &Info) { + if (STI.getFeatureBits() & llvm::ARM::HasV7Ops && + (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) && + (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) && + // Checks for the deprecated CP15ISB encoding: + // mcr p15, #0, rX, c7, c5, #4 + (MI.getOperand(3).isImm() && MI.getOperand(3).getImm() == 7)) { + if ((MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 4)) { + if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 5) { + Info = "deprecated since v7, use 'isb'"; + return true; + } + + // Checks for the deprecated CP15DSB encoding: + // mcr p15, #0, rX, c7, c10, #4 + if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10) { + Info = "deprecated since v7, use 'dsb'"; + return true; + } + } + // Checks for the deprecated CP15DMB encoding: + // mcr p15, #0, rX, c7, c10, #5 + if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10 && + (MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 5)) { + Info = "deprecated since v7, use 'dmb'"; + return true; + } + } + return false; +} + +static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, + std::string &Info) { + if (STI.getFeatureBits() & llvm::ARM::HasV8Ops && + MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) { + Info = "applying IT instruction to more than one subsequent instruction is deprecated"; + return true; + } + + return false; +} + #define GET_INSTRINFO_MC_DESC #include "ARMGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC #include "ARMGenSubtargetInfo.inc" -using namespace llvm; std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { Triple triple(TT); @@ -59,8 +102,17 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { std::string ARMArchFeature; if (Idx) { unsigned SubVer = TT[Idx]; - if (SubVer >= '7' && SubVer <= '9') { + if (SubVer == '8') { + if (NoCPU) + // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, FeatureMP, + // FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, FeatureT2XtPk, FeatureCrypto, FeatureCRC + ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,+trustzone,+t2xtpk,+crypto,+crc"; + else + // Use CPU to figure out the exact features + ARMArchFeature = "+v8"; + } else if (SubVer == '7') { if (Len >= Idx+2 && TT[Idx+1] == 'm') { + isThumb = true; if (NoCPU) // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass"; @@ -99,9 +151,10 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') ARMArchFeature = "+v6t2"; else if (Len >= Idx+2 && TT[Idx+1] == 'm') { + isThumb = true; if (NoCPU) // v6m: FeatureNoARM, FeatureMClass - ARMArchFeature = "+v6,+noarm,+mclass"; + ARMArchFeature = "+v6m,+noarm,+mclass"; else ARMArchFeature = "+v6"; } else @@ -159,7 +212,7 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { return X; } -static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) { +static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) @@ -212,6 +265,15 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T, return 0; } +static MCRelocationInfo *createARMMCRelocationInfo(StringRef TT, + MCContext &Ctx) { + Triple TheTriple(TT); + if (TheTriple.isEnvironmentMachO()) + return createARMMachORelocationInfo(Ctx); + // Default to the stock relocation info. + return llvm::createMCRelocationInfo(TT, Ctx); +} + namespace { class ARMMCInstrAnalysis : public MCInstrAnalysis { @@ -232,15 +294,16 @@ public: return MCInstrAnalysis::isConditionalBranch(Inst); } - uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size) const { + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target) const { // We only handle PCRel branches for now. if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL) - return -1ULL; + return false; int64_t Imm = Inst.getOperand(0).getImm(); // FIXME: This is not right for thumb. - return Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes. + Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes. + return true; } }; @@ -292,7 +355,17 @@ extern "C" void LLVMInitializeARMTargetMC() { TargetRegistry::RegisterMCObjectStreamer(TheARMTarget, createMCStreamer); TargetRegistry::RegisterMCObjectStreamer(TheThumbTarget, createMCStreamer); + // Register the asm streamer. + TargetRegistry::RegisterAsmStreamer(TheARMTarget, createMCAsmStreamer); + TargetRegistry::RegisterAsmStreamer(TheThumbTarget, createMCAsmStreamer); + // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter); TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter); + + // Register the MC relocation info. + TargetRegistry::RegisterMCRelocationInfo(TheARMTarget, + createARMMCRelocationInfo); + TargetRegistry::RegisterMCRelocationInfo(TheThumbTarget, + createARMMCRelocationInfo); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index a89981e4f060..959be8b55c3a 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -18,13 +18,17 @@ #include <string> namespace llvm { +class formatted_raw_ostream; class MCAsmBackend; class MCCodeEmitter; class MCContext; class MCInstrInfo; +class MCInstPrinter; class MCObjectWriter; class MCRegisterInfo; class MCSubtargetInfo; +class MCStreamer; +class MCRelocationInfo; class StringRef; class Target; class raw_ostream; @@ -41,12 +45,19 @@ namespace ARM_MC { StringRef FS); } +MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, + bool isVerboseAsm, bool useLoc, bool useCFI, + bool useDwarfDirectory, + MCInstPrinter *InstPrint, MCCodeEmitter *CE, + MCAsmBackend *TAB, bool ShowInst); + MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU); +MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); /// createARMELFObjectWriter - Construct an ELF Mach-O object writer. MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, @@ -58,6 +69,9 @@ MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); + +/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info. +MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx); } // End llvm namespace // Defines symbolic names for ARM registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp new file mode 100644 index 000000000000..807c9483bc38 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -0,0 +1,43 @@ +//===-- ARMMachORelocationInfo.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "ARMMCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRelocationInfo.h" +#include "llvm-c/Disassembler.h" + +using namespace llvm; +using namespace object; + +namespace { +class ARMMachORelocationInfo : public MCRelocationInfo { +public: + ARMMachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {} + + const MCExpr *createExprForCAPIVariantKind(const MCExpr *SubExpr, + unsigned VariantKind) { + switch(VariantKind) { + case LLVMDisassembler_VariantKind_ARM_HI16: + return ARMMCExpr::CreateUpper16(SubExpr, Ctx); + case LLVMDisassembler_VariantKind_ARM_LO16: + return ARMMCExpr::CreateLower16(SubExpr, Ctx); + default: + return MCRelocationInfo::createExprForCAPIVariantKind(SubExpr, + VariantKind); + } + } +}; +} // End unnamed namespace + +/// createARMMachORelocationInfo - Construct an ARM Mach-O RelocationInfo. +MCRelocationInfo *llvm::createARMMachORelocationInfo(MCContext &Ctx) { + return new ARMMachORelocationInfo(Ctx); +} diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index b9efe74b41e5..1f681bac2242 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -20,10 +20,9 @@ #include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" using namespace llvm; -using namespace llvm::object; namespace { class ARMMachObjectWriter : public MCMachObjectTargetWriter { @@ -63,7 +62,7 @@ public: static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, unsigned &Log2Size) { - RelocType = unsigned(macho::RIT_Vanilla); + RelocType = unsigned(MachO::ARM_RELOC_VANILLA); Log2Size = ~0U; switch (Kind) { @@ -92,21 +91,21 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: - RelocType = unsigned(macho::RIT_ARM_Branch24Bit); + RelocType = unsigned(MachO::ARM_RELOC_BR24); // Report as 'long', even though that is not quite accurate. Log2Size = llvm::Log2_32(4); return true; // Handle Thumb branches. case ARM::fixup_arm_thumb_br: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); + RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22); Log2Size = llvm::Log2_32(2); return true; case ARM::fixup_t2_uncondbranch: case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); + RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22); Log2Size = llvm::Log2_32(4); return true; @@ -121,23 +120,23 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, // 1 - thumb instructions case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 1; return true; case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 3; return true; case ARM::fixup_arm_movw_lo16: case ARM::fixup_arm_movw_lo16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 0; return true; case ARM::fixup_t2_movw_lo16: case ARM::fixup_t2_movw_lo16_pcrel: - RelocType = unsigned(macho::RIT_ARM_Half); + RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 2; return true; } @@ -153,7 +152,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_ARM_Half; + unsigned Type = MachO::ARM_RELOC_HALF; // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -179,7 +178,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. - Type = macho::RIT_ARM_HalfDifference; + Type = MachO::ARM_RELOC_HALF_SECTDIFF; Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } @@ -223,29 +222,29 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, break; } - if (Type == macho::RIT_ARM_HalfDifference) { + if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) { uint32_t OtherHalf = MovtBit ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); - macho::RelocationEntry MRE; - MRE.Word0 = ((OtherHalf << 0) | - (macho::RIT_Pair << 24) | - (MovtBit << 28) | - (ThumbBit << 29) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; + MachO::any_relocation_info MRE; + MRE.r_word0 = ((OtherHalf << 0) | + (MachO::ARM_RELOC_PAIR << 24) | + (MovtBit << 28) | + (ThumbBit << 29) | + (IsPCRel << 30) | + MachO::R_SCATTERED); + MRE.r_word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); } - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (MovtBit << 28) | - (ThumbBit << 29) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; + MachO::any_relocation_info MRE; + MRE.r_word0 = ((FixupOffset << 0) | + (Type << 24) | + (MovtBit << 28) | + (ThumbBit << 29) | + (IsPCRel << 30) | + MachO::R_SCATTERED); + MRE.r_word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); } @@ -259,7 +258,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = macho::RIT_Vanilla; + unsigned Type = MachO::ARM_RELOC_VANILLA; // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -284,31 +283,31 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. - Type = macho::RIT_Difference; + Type = MachO::ARM_RELOC_SECTDIFF; Value2 = Writer->getSymbolAddress(B_SD, Layout); FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent()); } // Relocations are written out in reverse order, so the PAIR comes first. - if (Type == macho::RIT_Difference || - Type == macho::RIT_Generic_LocalDifference) { - macho::RelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (macho::RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value2; + if (Type == MachO::ARM_RELOC_SECTDIFF || + Type == MachO::ARM_RELOC_LOCAL_SECTDIFF) { + MachO::any_relocation_info MRE; + MRE.r_word0 = ((0 << 0) | + (MachO::ARM_RELOC_PAIR << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + MachO::R_SCATTERED); + MRE.r_word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); } - macho::RelocationEntry MRE; - MRE.Word0 = ((FixupOffset << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - macho::RF_Scattered); - MRE.Word1 = Value; + MachO::any_relocation_info MRE; + MRE.r_word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + MachO::R_SCATTERED); + MRE.r_word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); } @@ -326,13 +325,13 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, switch (RelocType) { default: return false; - case macho::RIT_ARM_Branch24Bit: + case MachO::ARM_RELOC_BR24: // PC pre-adjustment of 8 for these instructions. Value -= 8; // ARM BL/BLX has a 25-bit offset. Range = 0x1ffffff; break; - case macho::RIT_ARM_ThumbBranch22Bit: + case MachO::ARM_THUMB_RELOC_BR22: // PC pre-adjustment of 4 for these instructions. Value -= 4; // Thumb BL/BLX has a 24-bit offset. @@ -361,7 +360,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, uint64_t &FixedValue) { unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; - unsigned RelocType = macho::RIT_Vanilla; + unsigned RelocType = MachO::ARM_RELOC_VANILLA; if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) // If we failed to get fixup kind info, it's because there's no legal // relocation type for the fixup kind. This happens when it's a fixup that's @@ -374,7 +373,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // scattered relocation entry. Differences always require scattered // relocations. if (Target.getSymB()) { - if (RelocType == macho::RIT_ARM_Half) + if (RelocType == MachO::ARM_RELOC_HALF) return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, @@ -392,7 +391,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // // Is this right for ARM? uint32_t Offset = Target.getConstant(); - if (IsPCRel && RelocType == macho::RIT_Vanilla) + if (IsPCRel && RelocType == MachO::ARM_RELOC_VANILLA) Offset += 1 << Log2Size; if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, @@ -445,17 +444,17 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, } // struct relocation_info (8 bytes) - macho::RelocationEntry MRE; - MRE.Word0 = FixupOffset; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); + MachO::any_relocation_info MRE; + MRE.r_word0 = FixupOffset; + MRE.r_word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); // Even when it's not a scattered relocation, movw/movt always uses // a PAIR relocation. - if (Type == macho::RIT_ARM_Half) { + if (Type == MachO::ARM_RELOC_HALF) { // The other-half value only gets populated for the movt and movw // relocation entries. uint32_t Value = 0; @@ -474,11 +473,11 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, Value = FixedValue & 0xffff; break; } - macho::RelocationEntry MREPair; - MREPair.Word0 = Value; - MREPair.Word1 = ((0xffffff) | - (Log2Size << 25) | - (macho::RIT_Pair << 28)); + MachO::any_relocation_info MREPair; + MREPair.r_word0 = Value; + MREPair.r_word1 = ((0xffffff << 0) | + (Log2Size << 25) | + (MachO::ARM_RELOC_PAIR << 28)); Writer->addRelocation(Fragment->getParent(), MREPair); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp index 191db69fbc2b..c94337081884 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -20,6 +20,48 @@ using namespace llvm; +namespace { + /// UnwindOpcodeStreamer - The simple wrapper over SmallVector to emit bytes + /// with MSB to LSB per uint32_t ordering. For example, the first byte will + /// be placed in Vec[3], and the following bytes will be placed in 2, 1, 0, + /// 7, 6, 5, 4, 11, 10, 9, 8, and so on. + class UnwindOpcodeStreamer { + private: + SmallVectorImpl<uint8_t> &Vec; + size_t Pos; + + public: + UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V), Pos(3) { + } + + /// Emit the byte in MSB to LSB per uint32_t order. + inline void EmitByte(uint8_t elem) { + Vec[Pos] = elem; + Pos = (((Pos ^ 0x3u) + 1) ^ 0x3u); + } + + /// Emit the size prefix. + inline void EmitSize(size_t Size) { + size_t SizeInWords = (Size + 3) / 4; + assert(SizeInWords <= 0x100u && + "Only 256 additional words are allowed for unwind opcodes"); + EmitByte(static_cast<uint8_t>(SizeInWords - 1)); + } + + /// Emit the personality index prefix. + inline void EmitPersonalityIndex(unsigned PI) { + assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix"); + EmitByte(EHT_COMPACT | PI); + } + + /// Fill the rest of bytes with FINISH opcode. + inline void FillFinishOpcode() { + while (Pos < Vec.size()) + EmitByte(UNWIND_OPCODE_FINISH); + } + }; +} + void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { if (RegSave == 0u) return; @@ -43,28 +85,22 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask); if (UnmaskedReg == 0u) { // Pop r[4 : (4 + n)] - Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range); + EmitInt8(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range); RegSave &= 0x000fu; } else if (UnmaskedReg == (1u << 14)) { // Pop r[14] + r[4 : (4 + n)] - Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range); + EmitInt8(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range); RegSave &= 0x000fu; } } // Two bytes opcode to save register r15-r4 - if ((RegSave & 0xfff0u) != 0) { - uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4); - Ops.push_back(static_cast<uint8_t>(Op >> 8)); - Ops.push_back(static_cast<uint8_t>(Op & 0xff)); - } + if ((RegSave & 0xfff0u) != 0) + EmitInt16(UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4)); // Opcode to save register r3-r0 - if ((RegSave & 0x000fu) != 0) { - uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu); - Ops.push_back(static_cast<uint8_t>(Op >> 8)); - Ops.push_back(static_cast<uint8_t>(Op & 0xff)); - } + if ((RegSave & 0x000fu) != 0) + EmitInt16(UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu)); } /// Emit unwind opcodes for .vsave directives @@ -89,10 +125,8 @@ void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) { Bit >>= 1; } - uint32_t Op = - UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range; - Ops.push_back(static_cast<uint8_t>(Op >> 8)); - Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + EmitInt16(UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | + ((i - 16) << 4) | Range); } while (i > 0) { @@ -113,86 +147,75 @@ void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) { Bit >>= 1; } - uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range; - Ops.push_back(static_cast<uint8_t>(Op >> 8)); - Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + EmitInt16(UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range); } } -/// Emit unwind opcodes for .setfp directives -void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) { - Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg); +/// Emit unwind opcodes to copy address from source register to $sp. +void UnwindOpcodeAssembler::EmitSetSP(uint16_t Reg) { + EmitInt8(UNWIND_OPCODE_SET_VSP | Reg); } -/// Emit unwind opcodes to update stack pointer +/// Emit unwind opcodes to add $sp with an offset. void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) { if (Offset > 0x200) { - uint8_t Buff[10]; - size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff); - Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128); - Ops.append(Buff, Buff + Size); + uint8_t Buff[16]; + Buff[0] = UNWIND_OPCODE_INC_VSP_ULEB128; + size_t ULEBSize = encodeULEB128((Offset - 0x204) >> 2, Buff + 1); + EmitBytes(Buff, ULEBSize + 1); } else if (Offset > 0) { if (Offset > 0x100) { - Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu); + EmitInt8(UNWIND_OPCODE_INC_VSP | 0x3fu); Offset -= 0x100; } - Ops.push_back(UNWIND_OPCODE_INC_VSP | - static_cast<uint8_t>((Offset - 4) >> 2)); + EmitInt8(UNWIND_OPCODE_INC_VSP | static_cast<uint8_t>((Offset - 4) >> 2)); } else if (Offset < 0) { while (Offset < -0x100) { - Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu); + EmitInt8(UNWIND_OPCODE_DEC_VSP | 0x3fu); Offset += 0x100; } - Ops.push_back(UNWIND_OPCODE_DEC_VSP | - static_cast<uint8_t>(((-Offset) - 4) >> 2)); + EmitInt8(UNWIND_OPCODE_DEC_VSP | + static_cast<uint8_t>(((-Offset) - 4) >> 2)); } } -void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) { - size_t SizeInWords = (size() + 3) / 4; - assert(SizeInWords <= 0x100u && - "Only 256 additional words are allowed for unwind opcodes"); - Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1); -} +void UnwindOpcodeAssembler::Finalize(unsigned &PersonalityIndex, + SmallVectorImpl<uint8_t> &Result) { -void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) { - assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix"); - Ops[Pos] = EHT_COMPACT | PI; -} + UnwindOpcodeStreamer OpStreamer(Result); -void UnwindOpcodeAssembler::EmitFinishOpcodes() { - for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i) - Ops.push_back(UNWIND_OPCODE_FINISH); -} - -void UnwindOpcodeAssembler::Finalize() { if (HasPersonality) { - // Personality specified by .personality directive - Offset = 1; - AddOpcodeSizePrefix(1); + // User-specifed personality routine: [ SIZE , OP1 , OP2 , ... ] + PersonalityIndex = NUM_PERSONALITY_INDEX; + size_t TotalSize = Ops.size() + 1; + size_t RoundUpSize = (TotalSize + 3) / 4 * 4; + Result.resize(RoundUpSize); + OpStreamer.EmitSize(RoundUpSize); } else { - if (getOpcodeSize() <= 3) { + if (Ops.size() <= 3) { // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ] - Offset = 1; PersonalityIndex = AEABI_UNWIND_CPP_PR0; - AddPersonalityIndexPrefix(Offset, PersonalityIndex); + Result.resize(4); + OpStreamer.EmitPersonalityIndex(PersonalityIndex); } else { // __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ] - Offset = 0; PersonalityIndex = AEABI_UNWIND_CPP_PR1; - AddPersonalityIndexPrefix(Offset, PersonalityIndex); - AddOpcodeSizePrefix(1); + size_t TotalSize = Ops.size() + 2; + size_t RoundUpSize = (TotalSize + 3) / 4 * 4; + Result.resize(RoundUpSize); + OpStreamer.EmitPersonalityIndex(PersonalityIndex); + OpStreamer.EmitSize(RoundUpSize); } } - // Emit the padding finish opcodes if the size() is not multiple of 4. - EmitFinishOpcodes(); + // Copy the unwind opcodes + for (size_t i = OpBegins.size() - 1; i > 0; --i) + for (size_t j = OpBegins[i - 1], end = OpBegins[i]; j < end; ++j) + OpStreamer.EmitByte(Ops[j]); - // Swap the byte order - uint8_t *Ptr = Ops.begin() + Offset; - assert(size() % 4 == 0 && "Final unwind opcodes should align to 4"); - for (size_t i = 0, n = size(); i < n; i += 4) { - std::swap(Ptr[i], Ptr[i + 3]); - std::swap(Ptr[i + 1], Ptr[i + 2]); - } + // Emit the padding finish opcodes if the size is not multiple of 4. + OpStreamer.FillFinishOpcode(); + + // Reset the assembler state + Reset(); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h index f6ecaeb8b29f..ac67c6efabb7 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h @@ -27,86 +27,61 @@ class MCSymbol; class UnwindOpcodeAssembler { private: - llvm::SmallVector<uint8_t, 8> Ops; - - unsigned Offset; - unsigned PersonalityIndex; + llvm::SmallVector<uint8_t, 32> Ops; + llvm::SmallVector<unsigned, 8> OpBegins; bool HasPersonality; - enum { - // The number of bytes to be preserved for the size and personality index - // prefix of unwind opcodes. - NUM_PRESERVED_PREFIX_BUF = 2 - }; - public: UnwindOpcodeAssembler() - : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF), - PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) { + : HasPersonality(0) { + OpBegins.push_back(0); } /// Reset the unwind opcode assembler. void Reset() { - Ops.resize(NUM_PRESERVED_PREFIX_BUF); - Offset = NUM_PRESERVED_PREFIX_BUF; - PersonalityIndex = NUM_PERSONALITY_INDEX; + Ops.clear(); + OpBegins.clear(); + OpBegins.push_back(0); HasPersonality = 0; } - /// Get the size of the payload (including the size byte) - size_t size() const { - return Ops.size() - Offset; - } - - /// Get the beginning of the payload - const uint8_t *begin() const { - return Ops.begin() + Offset; - } - - /// Get the payload - StringRef data() const { - return StringRef(reinterpret_cast<const char *>(begin()), size()); - } - /// Set the personality index void setPersonality(const MCSymbol *Per) { HasPersonality = 1; } - /// Get the personality index - unsigned getPersonalityIndex() const { - return PersonalityIndex; - } - /// Emit unwind opcodes for .save directives void EmitRegSave(uint32_t RegSave); /// Emit unwind opcodes for .vsave directives void EmitVFPRegSave(uint32_t VFPRegSave); - /// Emit unwind opcodes for .setfp directives - void EmitSetFP(uint16_t FPReg); + /// Emit unwind opcodes to copy address from source register to $sp. + void EmitSetSP(uint16_t Reg); - /// Emit unwind opcodes to update stack pointer + /// Emit unwind opcodes to add $sp with an offset. void EmitSPOffset(int64_t Offset); /// Finalize the unwind opcode sequence for EmitBytes() - void Finalize(); + void Finalize(unsigned &PersonalityIndex, + SmallVectorImpl<uint8_t> &Result); private: - /// Get the size of the opcodes in bytes. - size_t getOpcodeSize() const { - return Ops.size() - NUM_PRESERVED_PREFIX_BUF; + void EmitInt8(unsigned Opcode) { + Ops.push_back(Opcode & 0xff); + OpBegins.push_back(OpBegins.back() + 1); } - /// Add the length prefix to the payload - void AddOpcodeSizePrefix(size_t Pos); - - /// Add personality index prefix in some compact format - void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex); + void EmitInt16(unsigned Opcode) { + Ops.push_back((Opcode >> 8) & 0xff); + Ops.push_back(Opcode & 0xff); + OpBegins.push_back(OpBegins.back() + 2); + } - /// Fill the words with finish opcode if it is not aligned - void EmitFinishOpcodes(); + void EmitBytes(const uint8_t *Opcode, size_t Size) { + Ops.insert(Ops.end(), Opcode, Opcode + Size); + OpBegins.push_back(OpBegins.back() + Size); + } }; } // namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 1e2a8b03e1a0..cfb33f5b8212 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -88,7 +88,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -126,7 +127,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; - AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; break; case ARM::R8: @@ -135,16 +135,12 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; - if (STI.isTargetIOS()) { - AFI->addGPRCalleeSavedArea2Frame(FI); + if (STI.isTargetIOS()) GPRCS2Size += 4; - } else { - AFI->addGPRCalleeSavedArea1Frame(FI); + else GPRCS1Size += 4; - } break; default: - AFI->addDPRCalleeSavedAreaFrame(FI); DPRCSSize += 8; } } @@ -168,10 +164,17 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; + int FramePtrOffsetInBlock = 0; + if (tryFoldSPUpdateIntoPushPop(MF, prior(MBBI), NumBytes)) { + FramePtrOffsetInBlock = NumBytes; + NumBytes = 0; + } + // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { + FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size; AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0) + .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4) .setMIFlags(MachineInstr::FrameSetup)); if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, @@ -212,13 +215,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } -static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { - for (unsigned i = 0; CSRegs[i]; ++i) - if (Reg == CSRegs[i]) - return true; - return false; -} - static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) { if (MI->getOpcode() == ARM::tLDRspi && MI->getOperand(1).isFI() && @@ -249,7 +245,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -294,8 +291,9 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, &MBB.front() != MBBI && prior(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = prior(MBBI); - emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); - } else + if (!tryFoldSPUpdateIntoPushPop(MF, PMBBI, NumBytes)) + emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); + } else if (!tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes)) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 095736d52a88..22a925e0ffbc 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -22,7 +22,7 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(*this, STI) { + : ARMBaseInstrInfo(STI), RI(STI) { } /// getNoopForMachoTarget - Return the noop instruction to use for a noop. diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp index 7452fb776ebd..65a7221d5db7 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -40,9 +40,8 @@ extern cl::opt<bool> ReuseFrameIndexVals; using namespace llvm; -Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &sti) - : ARMBaseRegisterInfo(tii, sti) { +Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMSubtarget &sti) + : ARMBaseRegisterInfo(sti) { } const TargetRegisterClass* @@ -70,6 +69,7 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); @@ -426,7 +426,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, *this); } else { // Translate r0 = add sp, -imm to - // r0 = -imm (this is then translated into a series of instructons) + // r0 = -imm (this is then translated into a series of instructions) // r0 = add r0, sp emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); @@ -488,6 +488,9 @@ void Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, unsigned BaseReg, int64_t Offset) const { MachineInstr &MI = *I; + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>( + MI.getParent()->getParent()->getTarget().getInstrInfo()); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -513,6 +516,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // off the frame pointer (if, for example, there are alloca() calls in // the function, the offset will be negative. Use R12 instead since that's // a call clobbered register that we know won't be used in Thumb1 mode. + const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo(); DebugLoc DL; AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) .addReg(ARM::R12, RegState::Define) @@ -558,6 +562,8 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); MachineInstrBuilder MIB(*MBB.getParent(), &MI); @@ -567,11 +573,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + MF.getFrameInfo()->getStackSize() + SPAdj; - if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea1Offset(); - else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea2Offset(); - else if (MF.getFrameInfo()->hasVarSizedObjects()) { + if (MF.getFrameInfo()->hasVarSizedObjects()) { assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) && "Unexpected"); // There are alloca()'s in this function, must reference off the frame diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h index ebbab36dd7b8..9689b23146a0 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h @@ -25,7 +25,7 @@ namespace llvm { struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { public: - Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); + Thumb1RegisterInfo(const ARMSubtarget &STI); const TargetRegisterClass* getLargestLegalSuperClass(const TargetRegisterClass *RC) const; diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index 97c254ce75a5..0b7d3bb7754f 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -28,6 +28,7 @@ namespace { static char ID; Thumb2ITBlockPass() : MachineFunctionPass(ID) {} + bool restrictIT; const Thumb2InstrInfo *TII; const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; @@ -73,15 +74,15 @@ static void TrackDefUses(MachineInstr *MI, for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { unsigned Reg = LocalUses[i]; - Uses.insert(Reg); - for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg) + for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true); + Subreg.isValid(); ++Subreg) Uses.insert(*Subreg); } for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Reg = LocalDefs[i]; - Defs.insert(Reg); - for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg) + for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true); + Subreg.isValid(); ++Subreg) Defs.insert(*Subreg); if (Reg == ARM::CPSR) continue; @@ -192,37 +193,42 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; - // Branches, including tricky ones like LDM_RET, need to end an IT - // block so check the instruction we just put in the block. - for (; MBBI != E && Pos && - (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { - if (MBBI->isDebugValue()) - continue; - - MachineInstr *NMI = &*MBBI; - MI = NMI; - - unsigned NPredReg = 0; - ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); - if (NCC == CC || NCC == OCC) { - Mask |= (NCC & 1) << Pos; - // Add implicit use of ITSTATE. - NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, - true/*isImp*/, false/*isKill*/)); - LastITMI = NMI; - } else { - if (NCC == ARMCC::AL && - MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { - --MBBI; - MBB.remove(NMI); - MBB.insert(InsertPos, NMI); - ++NumMovedInsts; + + // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it + // is set: skip the loop + if (!restrictIT) { + // Branches, including tricky ones like LDM_RET, need to end an IT + // block so check the instruction we just put in the block. + for (; MBBI != E && Pos && + (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { + if (MBBI->isDebugValue()) continue; + + MachineInstr *NMI = &*MBBI; + MI = NMI; + + unsigned NPredReg = 0; + ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); + if (NCC == CC || NCC == OCC) { + Mask |= (NCC & 1) << Pos; + // Add implicit use of ITSTATE. + NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, + true/*isImp*/, false/*isKill*/)); + LastITMI = NMI; + } else { + if (NCC == ARMCC::AL && + MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { + --MBBI; + MBB.remove(NMI); + MBB.insert(InsertPos, NMI); + ++NumMovedInsts; + continue; + } + break; } - break; + TrackDefUses(NMI, Defs, Uses, TRI); + --Pos; } - TrackDefUses(NMI, Defs, Uses, TRI); - --Pos; } // Finalize IT mask. @@ -250,6 +256,7 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { AFI = Fn.getInfo<ARMFunctionInfo>(); TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); TRI = TM.getRegisterInfo(); + restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT(); if (!AFI->isThumbFunction()) return false; diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index a1b48c226aa8..91788ac4f893 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -31,12 +31,13 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(*this, STI) { + : ARMBaseInstrInfo(STI), RI(STI) { } /// getNoopForMachoTarget - Return the noop instruction to use for a noop. void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { - NopInst.setOpcode(ARM::tNOP); + NopInst.setOpcode(ARM::tHINT); + NopInst.addOperand(MCOperand::CreateImm(0)); NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); NopInst.addOperand(MCOperand::CreateReg(0)); } @@ -214,6 +215,13 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags) { + if (NumBytes == 0 && DestReg != BaseReg) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) + .addReg(BaseReg, RegState::Kill) + .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags); + return; + } + bool isSub = NumBytes < 0; if (isSub) NumBytes = -NumBytes; @@ -285,7 +293,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, NumBytes = 0; } else { // FIXME: Move this to ARMAddressingModes.h? - unsigned RotAmt = CountLeadingZeros_32(ThisVal); + unsigned RotAmt = countLeadingZeros(ThisVal); ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); NumBytes &= ~ThisVal; assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && @@ -302,7 +310,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, NumBytes = 0; } else { // FIXME: Move this to ARMAddressingModes.h? - unsigned RotAmt = CountLeadingZeros_32(ThisVal); + unsigned RotAmt = countLeadingZeros(ThisVal); ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); NumBytes &= ~ThisVal; assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && @@ -334,6 +342,7 @@ negativeOffsetOpcode(unsigned opcode) case ARM::t2STRi12: return ARM::t2STRi8; case ARM::t2STRBi12: return ARM::t2STRBi8; case ARM::t2STRHi12: return ARM::t2STRHi8; + case ARM::t2PLDi12: return ARM::t2PLDi8; case ARM::t2LDRi8: case ARM::t2LDRHi8: @@ -343,6 +352,7 @@ negativeOffsetOpcode(unsigned opcode) case ARM::t2STRi8: case ARM::t2STRBi8: case ARM::t2STRHi8: + case ARM::t2PLDi8: return opcode; default: @@ -364,6 +374,7 @@ positiveOffsetOpcode(unsigned opcode) case ARM::t2STRi8: return ARM::t2STRi12; case ARM::t2STRBi8: return ARM::t2STRBi12; case ARM::t2STRHi8: return ARM::t2STRHi12; + case ARM::t2PLDi8: return ARM::t2PLDi12; case ARM::t2LDRi12: case ARM::t2LDRHi12: @@ -373,6 +384,7 @@ positiveOffsetOpcode(unsigned opcode) case ARM::t2STRi12: case ARM::t2STRBi12: case ARM::t2STRHi12: + case ARM::t2PLDi12: return opcode; default: @@ -394,6 +406,7 @@ immediateOffsetOpcode(unsigned opcode) case ARM::t2STRs: return ARM::t2STRi12; case ARM::t2STRBs: return ARM::t2STRBi12; case ARM::t2STRHs: return ARM::t2STRHi12; + case ARM::t2PLDs: return ARM::t2PLDi12; case ARM::t2LDRi12: case ARM::t2LDRHi12: @@ -403,6 +416,7 @@ immediateOffsetOpcode(unsigned opcode) case ARM::t2STRi12: case ARM::t2STRBi12: case ARM::t2STRHi12: + case ARM::t2PLDi12: case ARM::t2LDRi8: case ARM::t2LDRHi8: case ARM::t2LDRBi8: @@ -411,6 +425,7 @@ immediateOffsetOpcode(unsigned opcode) case ARM::t2STRi8: case ARM::t2STRBi8: case ARM::t2STRHi8: + case ARM::t2PLDi8: return opcode; default: @@ -484,7 +499,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Otherwise, extract 8 adjacent bits from the immediate into this // t2ADDri/t2SUBri. - unsigned RotAmt = CountLeadingZeros_32(Offset); + unsigned RotAmt = countLeadingZeros<unsigned>(Offset); unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt); // We will handle these bits from offset, clear them. diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp index 1a7a4d450cfe..4cb827f308ef 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -24,9 +24,8 @@ #include "llvm/IR/Function.h" using namespace llvm; -Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &sti) - : ARMBaseRegisterInfo(tii, sti) { +Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMSubtarget &sti) + : ARMBaseRegisterInfo(sti) { } /// emitLoadConstPool - Emits a load from constpool to materialize the @@ -40,6 +39,7 @@ Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h index 6b397e869683..b1d63fa86d01 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h @@ -20,12 +20,12 @@ #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { - class ARMSubtarget; - class ARMBaseInstrInfo; + +class ARMSubtarget; struct Thumb2RegisterInfo : public ARMBaseRegisterInfo { public: - Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); + Thumb2RegisterInfo(const ARMSubtarget &STI); /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. |