diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2014-11-24 17:02:24 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2014-11-24 17:02:24 +0000 |
commit | 91bc56ed825ba56b3cc264aa5c95ab84f86832ab (patch) | |
tree | 4df130b28021d86e13bf4565ef58c1c5a5e093b4 /contrib/llvm/lib/Target/ARM | |
parent | 9efc7e72bb1daf5d6019871d9c93a1c488a11229 (diff) | |
parent | 5ca98fd98791947eba83a1ed3f2c8191ef7afa6c (diff) |
Merge llvm 3.5.0 release from ^/vendor/llvm/dist, resolve conflicts, and
preserve our customizations, where necessary.
Notes
Notes:
svn path=/projects/clang350-import/; revision=274968
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
96 files changed, 9778 insertions, 5992 deletions
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp index 3e805a2b68bc..92eaf9e1c9b3 100644 --- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -24,36 +24,30 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "a15-sd-optimizer" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMSubtarget.h" -#include "ARMISelLowering.h" -#include "ARMTargetMachine.h" - -#include "llvm/ADT/SmallPtrSet.h" +#include "ARMBaseRegisterInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" - #include <set> using namespace llvm; +#define DEBUG_TYPE "a15-sd-optimizer" + namespace { struct A15SDOptimizer : public MachineFunctionPass { static char ID; A15SDOptimizer() : MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "ARM A15 S->D optimizer"; } @@ -97,7 +91,7 @@ namespace { unsigned createImplicitDef(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, DebugLoc DL); - + // // Various property checkers // @@ -165,7 +159,7 @@ unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { if (!MI) return ARM::ssub_0; MachineOperand *MO = MI->findRegisterDefOperand(SReg); - assert(MO->isReg() && "Non register operand found!"); + assert(MO->isReg() && "Non-register operand found!"); if (!MO) return ARM::ssub_0; if (MI->isCopy() && usesRegClass(MI->getOperand(1), @@ -227,9 +221,9 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { IsDead = false; break; } - for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg), - EE = MRI->use_end(); - II != EE; ++II) { + for (MachineRegisterInfo::use_instr_iterator + II = MRI->use_instr_begin(Reg), EE = MRI->use_instr_end(); + II != EE; ++II) { // We don't care about self references. if (&*II == Def) continue; @@ -266,7 +260,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { if (DPRMI && SPRMI) { // See if the first operand of this insert_subreg is IMPLICIT_DEF MachineInstr *ECDef = elideCopies(DPRMI); - if (ECDef != 0 && ECDef->isImplicitDef()) { + if (ECDef && ECDef->isImplicitDef()) { // Another corner case - if we're inserting something that is purely // a subreg copy of a DPR, just use that DPR. @@ -327,8 +321,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); } - assert(0 && "Unhandled update pattern!"); - return 0; + llvm_unreachable("Unhandled update pattern!"); } // Return true if this MachineInstr inserts a scalar (SPR) value into @@ -355,10 +348,10 @@ MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { if (!MI->isFullCopy()) return MI; if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) - return NULL; + return nullptr; MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); if (!Def) - return NULL; + return nullptr; return elideCopies(Def); } @@ -442,7 +435,7 @@ A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, Out) .addReg(Reg) .addImm(Lane)); - + return Out; } @@ -608,7 +601,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR // lane, and the other lane(s) of the DPR/QPR register // that we are inserting in are undefined, use the - // original DPR/QPR value. + // original DPR/QPR value. // * Otherwise, fall back on the same stategy as COPY. // // * REG_SEQUENCE: * If all except one of the input operands are @@ -654,7 +647,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { unsigned DPRDefReg = MI->getOperand(0).getReg(); for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), E = MRI->use_end(); I != E; ++I) - Uses.push_back(&I.getOperand()); + Uses.push_back(&*I); // We can optimize this. unsigned NewReg = optimizeSDPattern(MI); @@ -700,7 +693,7 @@ bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { MI != ME;) { Modified |= runOnInstruction(MI++); } - + } for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h index 80e5f37eb086..55df29c1499a 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.h +++ b/contrib/llvm/lib/Target/ARM/ARM.h @@ -15,19 +15,19 @@ #ifndef TARGET_ARM_H #define TARGET_ARM_H -#include "MCTargetDesc/ARMBaseInfo.h" -#include "MCTargetDesc/ARMMCTargetDesc.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CodeGen.h" namespace llvm { class ARMAsmPrinter; class ARMBaseTargetMachine; class FunctionPass; +class ImmutablePass; class JITCodeEmitter; class MachineInstr; class MCInst; +class TargetLowering; +class TargetMachine; FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); @@ -43,6 +43,7 @@ FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); +FunctionPass *createARMOptimizeBarriersPass(); FunctionPass *createThumb2SizeReductionPass(); /// \brief Creates an ARM-specific Target Transformation Info pass. diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 36e5680ca4e0..7916ccc180c8 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -73,6 +73,11 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable support for CRC instructions">; +// Cyclone has preferred instructions for zeroing VFP registers, which can +// execute in 0 cycles. +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions">; + // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. @@ -179,7 +184,14 @@ def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", "Cortex-A5 ARM processors", [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, FeatureVMLxForwarding, FeatureT2XtPk, - FeatureTrustZone]>; + FeatureTrustZone, FeatureMP]>; +def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7", + "Cortex-A7 ARM processors", + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk, + FeatureVFP4, FeatureMP, + FeatureHWDiv, FeatureHWDivARM, + FeatureTrustZone, FeatureVirtualization]>; def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, @@ -198,6 +210,15 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, FeatureHasSlowFPVMLx, FeatureTrustZone]>; +def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12", + "Cortex-A12 ARM processors", + [FeatureVMLxForwarding, + FeatureT2XtPk, FeatureVFP4, + FeatureHWDiv, FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureVirtualization, + FeatureTrustZone]>; + // FIXME: It has not been determined if A15 has these features. def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", @@ -227,6 +248,26 @@ def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", FeatureAvoidPartialCPSR, FeatureT2XtPk]>; +// FIXME: krait has currently the same features as A9 +// plus VFP4 and hardware division features. +def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", + "Qualcomm ARM processors", + [FeatureVMLxForwarding, + FeatureT2XtPk, FeatureFP16, + FeatureAvoidPartialCPSR, + FeatureTrustZone, + FeatureVFP4, + FeatureHWDiv, + FeatureHWDivARM]>; + + +def FeatureAPCS : SubtargetFeature<"apcs", "TargetABI", "ARM_ABI_APCS", + "Use the APCS ABI">; + +def FeatureAAPCS : SubtargetFeature<"aapcs", "TargetABI", "ARM_ABI_AAPCS", + "Use the AAPCS ABI">; + + class ProcNoItin<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; @@ -296,6 +337,10 @@ def : ProcessorModel<"cortex-a5", CortexA8Model, [ProcA5, HasV7Ops, FeatureNEON, FeatureDB, FeatureVFP4, FeatureDSPThumb2, FeatureHasRAS, FeatureAClass]>; +def : ProcessorModel<"cortex-a7", CortexA8Model, + [ProcA7, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; def : ProcessorModel<"cortex-a8", CortexA8Model, [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS, @@ -308,11 +353,26 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureMP, FeatureHasRAS, FeatureAClass]>; + +// FIXME: A12 has currently the same Schedule model as A9 +def : ProcessorModel<"cortex-a12", CortexA9Model, + [ProcA12, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureMP, + FeatureHasRAS, FeatureAClass]>; + // FIXME: A15 has currently the same ProcessorModel as A9. def : ProcessorModel<"cortex-a15", CortexA9Model, [ProcA15, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS, FeatureAClass]>; + +// FIXME: krait has currently the same Schedule model as A9 +def : ProcessorModel<"krait", CortexA9Model, + [ProcKrait, HasV7Ops, + FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; + // FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ProcR5, HasV7Ops, FeatureDB, @@ -347,6 +407,13 @@ def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass, FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2]>; +// Cyclone is very similar to swift +def : ProcessorModel<"cyclone", SwiftModel, + [ProcSwift, HasV8Ops, HasV7Ops, + FeatureCrypto, FeatureFPARMv8, + FeatureDB,FeatureDSPThumb2, + FeatureHasRAS, FeatureZCZeroing]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -363,17 +430,6 @@ include "ARMInstrInfo.td" def ARMInstrInfo : InstrInfo; - -//===----------------------------------------------------------------------===// -// Assembly printer -//===----------------------------------------------------------------------===// -// ARM Uses the MC printer for asm output, so make sure the TableGen -// AsmWriter bits get associated with the correct class. -def ARMAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; -} - //===----------------------------------------------------------------------===// // Declare the target which we are implementing //===----------------------------------------------------------------------===// @@ -381,6 +437,4 @@ def ARMAsmWriter : AsmWriter { def ARM : Target { // Pull in Instruction Info: let InstructionSet = ARMInstrInfo; - - let AssemblyWriters = [ARMAsmWriter]; } diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index e79f88d4b6f1..28d2610c3903 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -12,10 +12,8 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "ARMAsmPrinter.h" #include "ARM.h" -#include "ARMBuildAttrs.h" #include "ARMConstantPoolValue.h" #include "ARMFPUName.h" #include "ARMMachineFunctionInfo.h" @@ -26,13 +24,13 @@ #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" @@ -45,80 +43,19 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetMachine.h" #include <cctype> using namespace llvm; -/// EmitDwarfRegOp - Emit dwarf register operation. -void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc, - bool Indirect) const { - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) { - AsmPrinter::EmitDwarfRegOp(MLoc, Indirect); - return; - } - assert(MLoc.isReg() && !Indirect && - "This doesn't support offset/indirection - implement it if needed"); - unsigned Reg = MLoc.getReg(); - if (Reg >= ARM::S0 && Reg <= ARM::S31) { - assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); - // S registers are described as bit-pieces of a register - // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) - // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) - - unsigned SReg = Reg - ARM::S0; - bool odd = SReg & 0x1; - unsigned Rx = 256 + (SReg >> 1); - - OutStreamer.AddComment("DW_OP_regx for S register"); - EmitInt8(dwarf::DW_OP_regx); - - OutStreamer.AddComment(Twine(SReg)); - EmitULEB128(Rx); - - if (odd) { - OutStreamer.AddComment("DW_OP_bit_piece 32 32"); - EmitInt8(dwarf::DW_OP_bit_piece); - EmitULEB128(32); - EmitULEB128(32); - } else { - OutStreamer.AddComment("DW_OP_bit_piece 32 0"); - EmitInt8(dwarf::DW_OP_bit_piece); - EmitULEB128(32); - EmitULEB128(0); - } - } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { - assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); - // Q registers Q0-Q15 are described by composing two D registers together. - // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) - // DW_OP_piece(8) - - unsigned QReg = Reg - ARM::Q0; - unsigned D1 = 256 + 2 * QReg; - unsigned D2 = D1 + 1; - - OutStreamer.AddComment("DW_OP_regx for Q register: D1"); - EmitInt8(dwarf::DW_OP_regx); - EmitULEB128(D1); - OutStreamer.AddComment("DW_OP_piece 8"); - EmitInt8(dwarf::DW_OP_piece); - EmitULEB128(8); - - OutStreamer.AddComment("DW_OP_regx for Q register: D2"); - EmitInt8(dwarf::DW_OP_regx); - EmitULEB128(D2); - OutStreamer.AddComment("DW_OP_piece 8"); - EmitInt8(dwarf::DW_OP_piece); - EmitULEB128(8); - } -} +#define DEBUG_TYPE "asm-printer" void ARMAsmPrinter::EmitFunctionBodyEnd() { // Make sure to terminate any constant pools that were at the end @@ -145,12 +82,13 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); assert(GV && "C++ constructor pointer was not a GlobalValue!"); - const MCExpr *E = MCSymbolRefExpr::Create(getSymbol(GV), - (Subtarget->isTargetDarwin() - ? MCSymbolRefExpr::VK_None - : MCSymbolRefExpr::VK_ARM_TARGET1), + const MCExpr *E = MCSymbolRefExpr::Create(GetARMGVSymbol(GV, + ARMII::MO_NO_FLAG), + (Subtarget->isTargetELF() + ? MCSymbolRefExpr::VK_ARM_TARGET1 + : MCSymbolRefExpr::VK_None), OutContext); - + OutStreamer.EmitValue(E, Size); } @@ -161,7 +99,28 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { AFI = MF.getInfo<ARMFunctionInfo>(); MCP = MF.getConstantPool(); - return AsmPrinter::runOnMachineFunction(MF); + SetupMachineFunction(MF); + + if (Subtarget->isTargetCOFF()) { + bool Internal = MF.getFunction()->hasInternalLinkage(); + COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC + : COFF::IMAGE_SYM_CLASS_EXTERNAL; + int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; + + OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); + OutStreamer.EmitCOFFSymbolStorageClass(Scl); + OutStreamer.EmitCOFFSymbolType(Type); + OutStreamer.EndCOFFSymbolDef(); + } + + // Have common code print out the function header with linkage info etc. + EmitFunctionHeader(); + + // Emit the rest of the function body. + EmitFunctionBody(); + + // We didn't modify anything. + return false; } void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, @@ -206,25 +165,16 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, else if ((Modifier && strcmp(Modifier, "hi16") == 0) || (TF & ARMII::MO_HI16)) O << ":upper16:"; - O << *getSymbol(GV); + O << *GetARMGVSymbol(GV, TF); printOffset(MO.getOffset(), O); if (TF == ARMII::MO_PLT) O << "(PLT)"; break; } - case MachineOperand::MO_ExternalSymbol: { - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - if (TF == ARMII::MO_PLT) - O << "(PLT)"; - break; - } case MachineOperand::MO_ConstantPoolIndex: O << *GetCPISymbol(MO.getIndex()); break; - case MachineOperand::MO_JumpTableIndex: - O << *GetJTISymbol(MO.getIndex()); - break; } } @@ -232,16 +182,18 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, MCSymbol *ARMAsmPrinter:: GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { + const DataLayout *DL = TM.getDataLayout(); SmallString<60> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI" + raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '_' << uid2; return OutContext.GetOrCreateSymbol(Name.str()); } MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const { + const DataLayout *DL = TM.getDataLayout(); SmallString<60> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH" + raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH" << getFunctionNumber(); return OutContext.GetOrCreateSymbol(Name.str()); } @@ -311,7 +263,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (ARM::GPRPairRegClass.contains(RegBegin)) { const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); - O << ARMInstPrinter::getRegisterName(Reg0) << ", ";; + O << ARMInstPrinter::getRegisterName(Reg0) << ", "; RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1); } O << ARMInstPrinter::getRegisterName(RegBegin); @@ -446,8 +398,22 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } +static bool isThumb(const MCSubtargetInfo& STI) { + return (STI.getFeatureBits() & ARM::ModeThumb) != 0; +} + +void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, + const MCSubtargetInfo *EndInfo) const { + // If either end mode is unknown (EndInfo == NULL) or different than + // the start mode, then restore the start mode. + const bool WasThumb = isThumb(StartInfo); + if (!EndInfo || WasThumb != isThumb(*EndInfo)) { + OutStreamer.EmitAssemblerFlag(WasThumb ? MCAF_Code16 : MCAF_Code32); + } +} + void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { - if (Subtarget->isTargetDarwin()) { + if (Subtarget->isTargetMachO()) { Reloc::Model RelocM = TM.getRelocationModel(); if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) { // Declare all the text sections up front (before the DWARF sections @@ -468,7 +434,7 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { // Now any user defined text sections from function attributes. for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F) if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) - TextSections.insert(TLOFMacho.SectionForGlobal(F, Mang, TM)); + TextSections.insert(TLOFMacho.SectionForGlobal(F, *Mang, TM)); // Now the coalescable sections. TextSections.insert(TLOFMacho.getTextCoalSection()); TextSections.insert(TLOFMacho.getConstTextCoalSection()); @@ -480,23 +446,30 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { if (RelocM == Reloc::DynamicNoPIC) { const MCSection *sect = OutContext.getMachOSection("__TEXT", "__symbol_stub4", - MCSectionMachO::S_SYMBOL_STUBS, + MachO::S_SYMBOL_STUBS, 12, SectionKind::getText()); OutStreamer.SwitchSection(sect); } else { const MCSection *sect = OutContext.getMachOSection("__TEXT", "__picsymbolstub4", - MCSectionMachO::S_SYMBOL_STUBS, + MachO::S_SYMBOL_STUBS, 16, SectionKind::getText()); OutStreamer.SwitchSection(sect); } const MCSection *StaticInitSect = OutContext.getMachOSection("__TEXT", "__StaticInit", - MCSectionMachO::S_REGULAR | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + MachO::S_REGULAR | + MachO::S_ATTR_PURE_INSTRUCTIONS, SectionKind::getText()); OutStreamer.SwitchSection(StaticInitSect); } + + // Compiling with debug info should not affect the code + // generation. Ensure the cstring section comes before the + // optional __DWARF secion. Otherwise, PC-relative loads would + // have to use different instruction sequences at "-g" in order to + // reach global data in the same object file. + OutStreamer.SwitchSection(getObjFileLowering().getCStringSection()); } // Use unified assembler syntax. @@ -507,9 +480,32 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { emitAttributes(); } +static void +emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel, + MachineModuleInfoImpl::StubValueTy &MCSym) { + // L_foo$stub: + OutStreamer.EmitLabel(StubLabel); + // .indirect_symbol _foo + OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol); + + if (MCSym.getInt()) + // External to current translation unit. + OutStreamer.EmitIntValue(0, 4/*size*/); + else + // Internal to current translation unit. + // + // When we place the LSDA into the TEXT section, the type info + // pointers need to be indirect and pc-rel. We accomplish this by + // using NLPs; however, sometimes the types are local to the file. + // We need to fill in the value for the NLP in those cases. + OutStreamer.EmitValue( + MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()), + 4 /*size*/); +} + void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Subtarget->isTargetDarwin()) { + if (Subtarget->isTargetMachO()) { // All darwin targets use mach-o. const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); @@ -523,27 +519,9 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); EmitAlignment(2); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); - // .indirect_symbol _foo - MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second; - OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol); - - if (MCSym.getInt()) - // External to current translation unit. - OutStreamer.EmitIntValue(0, 4/*size*/); - else - // Internal to current translation unit. - // - // When we place the LSDA into the TEXT section, the type info - // pointers need to be indirect and pc-rel. We accomplish this by - // using NLPs; however, sometimes the types are local to the file. - // We need to fill in the value for the NLP in those cases. - OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), - OutContext), - 4/*size*/); - } + + for (auto &Stub : Stubs) + emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second); Stubs.clear(); OutStreamer.AddBlankLine(); @@ -551,17 +529,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { Stubs = MMIMacho.GetHiddenGVStubList(); if (!Stubs.empty()) { - OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); EmitAlignment(2); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); - // .long _foo - OutStreamer.EmitValue(MCSymbolRefExpr:: - Create(Stubs[i].second.getPointer(), - OutContext), - 4/*size*/); - } + + for (auto &Stub : Stubs) + emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second); Stubs.clear(); OutStreamer.AddBlankLine(); @@ -574,6 +546,28 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } + + // Emit a .data.rel section containing any stubs that were created. + if (Subtarget->isTargetELF()) { + const TargetLoweringObjectFileELF &TLOFELF = + static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const DataLayout *TD = TM.getDataLayout(); + + for (auto &stub: Stubs) { + OutStreamer.EmitLabel(stub.first); + OutStreamer.EmitSymbolValue(stub.second.getPointer(), + TD->getPointerSize(0)); + } + Stubs.clear(); + } + } } //===----------------------------------------------------------------------===// @@ -611,28 +605,33 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU, } void ARMAsmPrinter::emitAttributes() { - MCTargetStreamer &TS = OutStreamer.getTargetStreamer(); + MCTargetStreamer &TS = *OutStreamer.getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); ATS.switchVendor("aeabi"); std::string CPUString = Subtarget->getCPUString(); - if (CPUString != "generic") + // FIXME: remove krait check when GNU tools support krait cpu + if (CPUString != "generic" && CPUString != "krait") ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, Subtarget)); - if (Subtarget->isAClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::ApplicationProfile); - } else if (Subtarget->isRClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::RealTimeProfile); - } else if (Subtarget->isMClass()){ - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::MicroControllerProfile); + // Tag_CPU_arch_profile must have the default value of 0 when "Architecture + // profile is not applicable (e.g. pre v7, or cross-profile code)". + if (Subtarget->hasV7Ops()) { + if (Subtarget->isAClass()) { + ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); + } else if (Subtarget->isRClass()) { + ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::RealTimeProfile); + } else if (Subtarget->isMClass()) { + ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::MicroControllerProfile); + } } ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, Subtarget->hasARMOps() ? @@ -673,6 +672,20 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitFPU(ARM::VFPV2); } + if (TM.getRelocationModel() == Reloc::PIC_) { + // PIC specific attributes. + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data, + ARMBuildAttrs::AddressRWPCRel); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RO_data, + ARMBuildAttrs::AddressROPCRel); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use, + ARMBuildAttrs::AddressGOT); + } else { + // Allow direct addressing of imported data for all other relocation models. + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use, + ARMBuildAttrs::AddressDirect); + } + // Signal various FP modes. if (!TM.Options.UnsafeFPMath) { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); @@ -687,10 +700,10 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::AllowIEE754); - // FIXME: add more flags to ARMBuildAttrs.h + // FIXME: add more flags to ARMBuildAttributes.h // 8-bytes alignment stuff. - ATS.emitAttribute(ARMBuildAttrs::ABI_align8_needed, 1); - ATS.emitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1); + ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1); + ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1); // ABI_HardFP_use attribute to indicate single precision FP. if (Subtarget->isFPOnlySP()) @@ -709,11 +722,39 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasMPExtension()) ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); - if (Subtarget->hasDivide()) { - // Check if hardware divide is only available in thumb2 or ARM as well. - ATS.emitAttribute(ARMBuildAttrs::DIV_use, - Subtarget->hasDivideInARMMode() ? ARMBuildAttrs::AllowDIVExt : - ARMBuildAttrs::AllowDIVIfExists); + // Hardware divide in ARM mode is part of base arch, starting from ARMv8. + // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). + // It is not possible to produce DisallowDIV: if hwdiv is present in the base + // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. + // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; + // otherwise, the default value (AllowDIVIfExists) applies. + if (Subtarget->hasDivideInARMMode() && !Subtarget->hasV8Ops()) + ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); + + if (MMI) { + if (const Module *SourceModule = MMI->getModule()) { + // ABI_PCS_wchar_t to indicate wchar_t width + // FIXME: There is no way to emit value 0 (wchar_t prohibited). + if (auto WCharWidthValue = cast_or_null<ConstantInt>( + SourceModule->getModuleFlag("wchar_size"))) { + int WCharWidth = WCharWidthValue->getZExtValue(); + assert((WCharWidth == 2 || WCharWidth == 4) && + "wchar_t width must be 2 or 4 bytes"); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth); + } + + // ABI_enum_size to indicate enum width + // FIXME: There is no way to emit value 0 (enums prohibited) or value 3 + // (all enums contain a value needing 32 bits to encode). + if (auto EnumWidthValue = cast_or_null<ConstantInt>( + SourceModule->getModuleFlag("min_enum_size"))) { + int EnumWidth = EnumWidthValue->getZExtValue(); + assert((EnumWidth == 1 || EnumWidth == 4) && + "Minimum enum width must be 1 or 4 bytes"); + int EnumBuildAttr = EnumWidth == 1 ? 1 : 2; + ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr); + } + } } if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization()) @@ -729,28 +770,6 @@ void ARMAsmPrinter::emitAttributes() { ATS.finishAttributeSection(); } -void ARMAsmPrinter::emitARMAttributeSection() { - // <format-version> - // [ <section-length> "vendor-name" - // [ <file-tag> <size> <attribute>* - // | <section-tag> <size> <section-number>* 0 <attribute>* - // | <symbol-tag> <size> <symbol-number>* 0 <attribute>* - // ]+ - // ]* - - if (OutStreamer.hasRawTextSupport()) - return; - - const ARMElfTargetObjectFile &TLOFELF = - static_cast<const ARMElfTargetObjectFile &> - (getObjFileLowering()); - - OutStreamer.SwitchSection(TLOFELF.getAttributesSection()); - - // Format version - OutStreamer.EmitIntValue(0x41, 1); -} - //===----------------------------------------------------------------------===// static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber, @@ -765,36 +784,57 @@ static MCSymbolRefExpr::VariantKind getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { switch (Modifier) { case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None; - case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD; - case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF; - case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_ARM_GOTTPOFF; - case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT; - case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF; + case ARMCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD; + case ARMCP::TPOFF: return MCSymbolRefExpr::VK_TPOFF; + case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_GOTTPOFF; + case ARMCP::GOT: return MCSymbolRefExpr::VK_GOT; + case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_GOTOFF; } llvm_unreachable("Invalid ARMCPModifier!"); } -MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { - bool isIndirect = Subtarget->isTargetDarwin() && - Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); - if (!isIndirect) - return getSymbol(GV); +MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, + unsigned char TargetFlags) { + if (Subtarget->isTargetMachO()) { + bool IsIndirect = (TargetFlags & ARMII::MO_NONLAZY) && + Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); + + if (!IsIndirect) + return getSymbol(GV); - // FIXME: Remove this when Darwin transition to @GOT like syntax. - MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - MachineModuleInfoMachO &MMIMachO = - MMI->getObjFileInfo<MachineModuleInfoMachO>(); - MachineModuleInfoImpl::StubValueTy &StubSym = - GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) : - MMIMachO.getGVStubEntry(MCSym); - if (StubSym.getPointer() == 0) - StubSym = MachineModuleInfoImpl:: - StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); - return MCSym; + // FIXME: Remove this when Darwin transition to @GOT like syntax. + MCSymbol *MCSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + MachineModuleInfoMachO &MMIMachO = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + MachineModuleInfoImpl::StubValueTy &StubSym = + GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) + : MMIMachO.getGVStubEntry(MCSym); + if (!StubSym.getPointer()) + StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), + !GV->hasInternalLinkage()); + return MCSym; + } else if (Subtarget->isTargetCOFF()) { + assert(Subtarget->isTargetWindows() && + "Windows is the only supported COFF target"); + + bool IsIndirect = (TargetFlags & ARMII::MO_DLLIMPORT); + if (!IsIndirect) + return getSymbol(GV); + + SmallString<128> Name; + Name = "__imp_"; + getNameWithPrefix(Name, GV); + + return OutContext.GetOrCreateSymbol(Name); + } else if (Subtarget->isTargetELF()) { + return getSymbol(GV); + } + llvm_unreachable("unexpected target"); } void ARMAsmPrinter:: EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + const DataLayout *DL = TM.getDataLayout(); int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); @@ -803,7 +843,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { if (ACPV->isLSDA()) { SmallString<128> Str; raw_svector_ostream OS(Str); - OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); + OS << DL->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); MCSym = OutContext.GetOrCreateSymbol(OS.str()); } else if (ACPV->isBlockAddress()) { const BlockAddress *BA = @@ -811,7 +851,11 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { MCSym = GetBlockAddressSymbol(BA); } else if (ACPV->isGlobalValue()) { const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV(); - MCSym = GetARMGVSymbol(GV); + + // On Darwin, const-pool entries may get the "FOO$non_lazy_ptr" mangling, so + // flag the global as MO_NONLAZY. + unsigned char TF = Subtarget->isTargetMachO() ? ARMII::MO_NONLAZY : 0; + MCSym = GetARMGVSymbol(GV, TF); } else if (ACPV->isMachineBasicBlock()) { const MachineBasicBlock *MBB = cast<ARMConstantPoolMBB>(ACPV)->getMBB(); MCSym = MBB->getSymbol(); @@ -827,7 +871,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { OutContext); if (ACPV->getPCAdjustment()) { - MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(), + MCSymbol *PCLabel = getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), ACPV->getLabelId(), OutContext); @@ -929,10 +973,10 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { MachineBasicBlock *MBB = JTBBs[i]; const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(), - OutContext); + OutContext); // If this isn't a TBB or TBH, the entries are direct branch instructions. if (OffsetWidth == 4) { - OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2B) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::t2B) .addExpr(MBBSymbolExpr) .addImm(ARMCC::AL) .addReg(0)); @@ -966,7 +1010,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(MI->getFlag(MachineInstr::FrameSetup) && "Only instruction which are involved into frame setup code are allowed"); - MCTargetStreamer &TS = OutStreamer.getTargetStreamer(); + MCTargetStreamer &TS = *OutStreamer.getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); const MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); @@ -1030,7 +1074,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { RegList.push_back(SrcReg); break; } - ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); + if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); } else { // Changes of stack / frame pointer. if (SrcReg == ARM::SP) { @@ -1075,20 +1120,22 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } } - if (DstReg == FramePtr && FramePtr != ARM::SP) - // Set-up of the frame pointer. Positive values correspond to "add" - // instruction. - ATS.emitSetFP(FramePtr, ARM::SP, -Offset); - else if (DstReg == ARM::SP) { - // Change of SP by an offset. Positive values correspond to "sub" - // instruction. - ATS.emitPad(Offset); - } else { - MI->dump(); - llvm_unreachable("Unsupported opcode for unwinding information"); + if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) { + if (DstReg == FramePtr && FramePtr != ARM::SP) + // Set-up of the frame pointer. Positive values correspond to "add" + // instruction. + ATS.emitSetFP(FramePtr, ARM::SP, -Offset); + else if (DstReg == ARM::SP) { + // Change of SP by an offset. Positive values correspond to "sub" + // instruction. + ATS.emitPad(Offset); + } else { + // Move of SP to a register. Positive values correspond to an "add" + // instruction. + ATS.emitMovSP(DstReg, -Offset); + } } } else if (DstReg == ARM::SP) { - // FIXME: .movsp goes here MI->dump(); llvm_unreachable("Unsupported opcode for unwinding information"); } @@ -1099,13 +1146,13 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } } -extern cl::opt<bool> EnableARMEHABI; - // Simple pseudo-instructions have their lowering (with expansion to real // instructions) auto-generated. #include "ARMGenMCPseudoLowering.inc" void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { + const DataLayout *DL = TM.getDataLayout(); + // If we just ended a constant pool, mark it as such. if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) { OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); @@ -1113,7 +1160,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } // Emit unwinding stuff for frame-related instructions - if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) + if (Subtarget->isTargetEHABICompatible() && + MI->getFlag(MachineInstr::FrameSetup)) EmitUnwindingInstruction(MI); // Do any auto-generated pseudo lowerings. @@ -1133,7 +1181,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::t2LEApcrel: { // FIXME: Need to also handle globals and externals MCSymbol *CPISymbol = GetCPISymbol(MI->getOperand(1).getIndex()); - OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() == + EmitToStreamer(OutStreamer, MCInstBuilder(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR : ARM::ADR)) @@ -1150,7 +1198,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *JTIPICSymbol = GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(), MI->getOperand(2).getImm()); - OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() == + EmitToStreamer(OutStreamer, MCInstBuilder(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR : ARM::ADR)) @@ -1164,7 +1212,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Darwin call instructions are just normal call instructions with different // clobber semantics (they clobber R9). case ARM::BX_CALL: { - OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVr) .addReg(ARM::LR) .addReg(ARM::PC) // Add predicate operands. @@ -1173,19 +1221,19 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Add 's' bit operand (always reg0 for this) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::BX) .addReg(MI->getOperand(0).getReg())); return; } case ARM::tBX_CALL: { - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr) .addReg(ARM::LR) .addReg(ARM::PC) // Add predicate operands. .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX) .addReg(MI->getOperand(0).getReg()) // Add predicate operands. .addImm(ARMCC::AL) @@ -1193,7 +1241,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case ARM::BMOVPCRX_CALL: { - OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVr) .addReg(ARM::LR) .addReg(ARM::PC) // Add predicate operands. @@ -1202,7 +1250,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Add 's' bit operand (always reg0 for this) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVr) .addReg(ARM::PC) .addReg(MI->getOperand(0).getReg()) // Add predicate operands. @@ -1213,7 +1261,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case ARM::BMOVPCB_CALL: { - OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVr) .addReg(ARM::LR) .addReg(ARM::PC) // Add predicate operands. @@ -1222,10 +1270,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Add 's' bit operand (always reg0 for this) .addReg(0)); - const GlobalValue *GV = MI->getOperand(0).getGlobal(); - MCSymbol *GVSym = getSymbol(GV); + const MachineOperand &Op = MI->getOperand(0); + const GlobalValue *GV = Op.getGlobal(); + const unsigned TF = Op.getTargetFlags(); + MCSymbol *GVSym = GetARMGVSymbol(GV, TF); const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::Bcc) .addExpr(GVSymExpr) // Add predicate operands. .addImm(ARMCC::AL) @@ -1239,33 +1289,28 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); unsigned TF = MI->getOperand(1).getTargetFlags(); - bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC; const GlobalValue *GV = MI->getOperand(1).getGlobal(); - MCSymbol *GVSym = GetARMGVSymbol(GV); + MCSymbol *GVSym = GetARMGVSymbol(GV, TF); const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - if (isPIC) { - MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(), - getFunctionNumber(), - MI->getOperand(2).getImm(), OutContext); - const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); - unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4; - const MCExpr *PCRelExpr = - ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr, - MCBinaryExpr::CreateAdd(LabelSymExpr, + + MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(), + getFunctionNumber(), + MI->getOperand(2).getImm(), OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4; + const MCExpr *PCRelExpr = + ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr, + MCBinaryExpr::CreateAdd(LabelSymExpr, MCConstantExpr::Create(PCAdj, OutContext), - OutContext), OutContext), OutContext); + OutContext), OutContext), OutContext); TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr)); - } else { - const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(RefExpr)); - } // Add predicate operands. TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::CreateReg(0)); // Add 's' bit operand (always reg0 for this) TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); return; } case ARM::MOVTi16_ga_pcrel: @@ -1277,32 +1322,27 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); unsigned TF = MI->getOperand(2).getTargetFlags(); - bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC; const GlobalValue *GV = MI->getOperand(2).getGlobal(); - MCSymbol *GVSym = GetARMGVSymbol(GV); + MCSymbol *GVSym = GetARMGVSymbol(GV, TF); const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - if (isPIC) { - MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(), - getFunctionNumber(), - MI->getOperand(3).getImm(), OutContext); - const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); - unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4; - const MCExpr *PCRelExpr = + + MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(), + getFunctionNumber(), + MI->getOperand(3).getImm(), OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4; + const MCExpr *PCRelExpr = ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr, MCBinaryExpr::CreateAdd(LabelSymExpr, MCConstantExpr::Create(PCAdj, OutContext), OutContext), OutContext), OutContext); TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr)); - } else { - const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(RefExpr)); - } // Add predicate operands. TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::CreateReg(0)); // Add 's' bit operand (always reg0 for this) TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); return; } case ARM::tPICADD: { @@ -1312,12 +1352,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // This adds the address of LPC0 to r0. // Emit the label. - OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(), + OutStreamer.EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), MI->getOperand(2).getImm(), OutContext)); // Form and emit the add. - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDhirr) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tADDhirr) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(0).getReg()) .addReg(ARM::PC) @@ -1333,12 +1373,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // This adds the address of LPC0 to r0. // Emit the label. - OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(), + OutStreamer.EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), MI->getOperand(2).getImm(), OutContext)); // Form and emit the add. - OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::ADDrr) .addReg(MI->getOperand(0).getReg()) .addReg(ARM::PC) .addReg(MI->getOperand(1).getReg()) @@ -1364,7 +1404,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // a PC-relative address at the ldr instruction. // Emit the label. - OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(), + OutStreamer.EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), MI->getOperand(2).getImm(), OutContext)); @@ -1382,7 +1422,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::PICLDRSB: Opcode = ARM::LDRSB; break; case ARM::PICLDRSH: Opcode = ARM::LDRSH; break; } - OutStreamer.EmitInstruction(MCInstBuilder(Opcode) + EmitToStreamer(OutStreamer, MCInstBuilder(Opcode) .addReg(MI->getOperand(0).getReg()) .addReg(ARM::PC) .addReg(MI->getOperand(1).getReg()) @@ -1419,7 +1459,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case ARM::t2BR_JT: { // Lower and emit the instruction itself, then the jump table following it. - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr) .addReg(ARM::PC) .addReg(MI->getOperand(0).getReg()) // Add predicate operands. @@ -1432,7 +1472,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case ARM::t2TBB_JT: { // Lower and emit the instruction itself, then the jump table following it. - OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBB) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::t2TBB) .addReg(ARM::PC) .addReg(MI->getOperand(0).getReg()) // Add predicate operands. @@ -1447,7 +1487,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case ARM::t2TBH_JT: { // Lower and emit the instruction itself, then the jump table following it. - OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBH) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::t2TBH) .addReg(ARM::PC) .addReg(MI->getOperand(0).getReg()) // Add predicate operands. @@ -1474,7 +1514,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Add 's' bit operand (always reg0 for this) if (Opc == ARM::MOVr) TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); // Make sure the Thumb jump table is 4-byte aligned. if (Opc == ARM::tMOVr) @@ -1504,7 +1544,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Add predicate operands. TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); // Output the data for the jump table itself EmitJumpTable(MI); @@ -1513,7 +1553,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::BR_JTadd: { // Lower and emit the instruction itself, then the jump table following it. // add pc, target, idx - OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::ADDrr) .addReg(ARM::PC) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) @@ -1530,7 +1570,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::TRAP: { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. - if (!Subtarget->isTargetDarwin()) { + if (!Subtarget->isTargetMachO()) { //.long 0xe7ffdefe @ trap uint32_t Val = 0xe7ffdefeUL; OutStreamer.AddComment("trap"); @@ -1549,7 +1589,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::tTRAP: { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. - if (!Subtarget->isTargetDarwin()) { + if (!Subtarget->isTargetMachO()) { //.short 57086 @ trap uint16_t Val = 0xdefe; OutStreamer.AddComment("trap"); @@ -1573,14 +1613,14 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned ValReg = MI->getOperand(1).getReg(); MCSymbol *Label = GetARMSJLJEHLabel(); OutStreamer.AddComment("eh_setjmp begin"); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr) .addReg(ValReg) .addReg(ARM::PC) // Predicate. .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDi3) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tADDi3) .addReg(ValReg) // 's' bit operand .addReg(ARM::CPSR) @@ -1590,7 +1630,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tSTRi) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tSTRi) .addReg(ValReg) .addReg(SrcReg) // The offset immediate is #4. The operand value is scaled by 4 for the @@ -1600,7 +1640,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVi8) .addReg(ARM::R0) .addReg(ARM::CPSR) .addImm(0) @@ -1609,13 +1649,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(0)); const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tB) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tB) .addExpr(SymbolExpr) .addImm(ARMCC::AL) .addReg(0)); OutStreamer.AddComment("eh_setjmp end"); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVi8) .addReg(ARM::R0) .addReg(ARM::CPSR) .addImm(1) @@ -1639,7 +1679,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned ValReg = MI->getOperand(1).getReg(); OutStreamer.AddComment("eh_setjmp begin"); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::ADDri) .addReg(ValReg) .addReg(ARM::PC) .addImm(8) @@ -1649,7 +1689,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // 's' bit operand (always reg0 for this). .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::STRi12) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::STRi12) .addReg(ValReg) .addReg(SrcReg) .addImm(4) @@ -1657,7 +1697,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVi) .addReg(ARM::R0) .addImm(0) // Predicate. @@ -1666,7 +1706,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // 's' bit operand (always reg0 for this). .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::ADDri) .addReg(ARM::PC) .addReg(ARM::PC) .addImm(0) @@ -1677,7 +1717,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(0)); OutStreamer.AddComment("eh_setjmp end"); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::MOVi) .addReg(ARM::R0) .addImm(1) // Predicate. @@ -1694,7 +1734,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // bx $scratch unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ScratchReg = MI->getOperand(1).getReg(); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::LDRi12) .addReg(ARM::SP) .addReg(SrcReg) .addImm(8) @@ -1702,7 +1742,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::LDRi12) .addReg(ScratchReg) .addReg(SrcReg) .addImm(4) @@ -1710,7 +1750,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::LDRi12) .addReg(ARM::R7) .addReg(SrcReg) .addImm(0) @@ -1718,7 +1758,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::BX) .addReg(ScratchReg) // Predicate. .addImm(ARMCC::AL) @@ -1733,7 +1773,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // bx $scratch unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ScratchReg = MI->getOperand(1).getReg(); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tLDRi) .addReg(ScratchReg) .addReg(SrcReg) // The offset immediate is #8. The operand value is scaled by 4 for the @@ -1743,14 +1783,14 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr) .addReg(ARM::SP) .addReg(ScratchReg) // Predicate. .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tLDRi) .addReg(ScratchReg) .addReg(SrcReg) .addImm(1) @@ -1758,7 +1798,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tLDRi) .addReg(ARM::R7) .addReg(SrcReg) .addImm(0) @@ -1766,7 +1806,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX) + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX) .addReg(ScratchReg) // Predicate. .addImm(ARMCC::AL) @@ -1778,7 +1818,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; LowerARMMachineInstrToMCInst(MI, TmpInst, *this); - OutStreamer.EmitInstruction(TmpInst); + EmitToStreamer(OutStreamer, TmpInst); } //===----------------------------------------------------------------------===// @@ -1787,6 +1827,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Force static initialization. extern "C" void LLVMInitializeARMAsmPrinter() { - RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget); - RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget); + RegisterAsmPrinter<ARMAsmPrinter> X(TheARMLETarget); + RegisterAsmPrinter<ARMAsmPrinter> Y(TheARMBETarget); + RegisterAsmPrinter<ARMAsmPrinter> A(TheThumbLETarget); + RegisterAsmPrinter<ARMAsmPrinter> B(TheThumbBETarget); } diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h index de72e063e0d5..7c103c6763f2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -10,14 +10,16 @@ #ifndef ARMASMPRINTER_H #define ARMASMPRINTER_H -#include "ARM.h" -#include "ARMTargetMachine.h" +#include "ARMSubtarget.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { +class ARMFunctionInfo; class MCOperand; +class MachineConstantPool; +class MachineOperand; namespace ARM { enum DW_ISA { @@ -45,37 +47,41 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { bool InConstantPool; public: explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL), InConstantPool(false) { - Subtarget = &TM.getSubtarget<ARMSubtarget>(); - } + : AsmPrinter(TM, Streamer), AFI(nullptr), MCP(nullptr), + InConstantPool(false) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + } - virtual const char *getPassName() const LLVM_OVERRIDE { + const char *getPassName() const override { return "ARM Assembly / Object Emitter"; } void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) override; - virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) LLVM_OVERRIDE; - virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) LLVM_OVERRIDE; + void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, + const MCSubtargetInfo *EndInfo) const override; void EmitJumpTable(const MachineInstr *MI); void EmitJump2Table(const MachineInstr *MI); - virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE; - virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE; + void EmitInstruction(const MachineInstr *MI) override; + bool runOnMachineFunction(MachineFunction &F) override; - virtual void EmitConstantPool() LLVM_OVERRIDE { + void EmitConstantPool() override { // we emit constant pools customly! } - virtual void EmitFunctionBodyEnd() LLVM_OVERRIDE; - virtual void EmitFunctionEntryLabel() LLVM_OVERRIDE; - virtual void EmitStartOfAsmFile(Module &M) LLVM_OVERRIDE; - virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE; - virtual void EmitXXStructor(const Constant *CV) LLVM_OVERRIDE; + void EmitFunctionBodyEnd() override; + void EmitFunctionEntryLabel() override; + void EmitStartOfAsmFile(Module &M) override; + void EmitEndOfAsmFile(Module &M) override; + void EmitXXStructor(const Constant *CV) override; // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); @@ -84,9 +90,6 @@ private: // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile() void emitAttributes(); - // Helper for ELF .o only - void emitARMAttributeSection(); - // Generic helper used to emit e.g. ARMv5 mul pseudos void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc); @@ -97,13 +100,9 @@ private: const MachineInstr *MI); public: - /// EmitDwarfRegOp - Emit dwarf register operation. - virtual void EmitDwarfRegOp(const MachineLocation &MLoc, bool Indirect) const - LLVM_OVERRIDE; - - virtual unsigned getISAEncoding() LLVM_OVERRIDE { + unsigned getISAEncoding() override { // ARM/Darwin adds ISA to the DWARF info for each function. - if (!Subtarget->isTargetDarwin()) + if (!Subtarget->isTargetMachO()) return 0; return Subtarget->isThumb() ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; @@ -115,13 +114,12 @@ private: MCSymbol *GetARMSJLJEHLabel() const; - MCSymbol *GetARMGVSymbol(const GlobalValue *GV); + MCSymbol *GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags); public: /// EmitMachineConstantPoolValue - Print a machine constantpool value to /// the .s file. - virtual void - EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) LLVM_OVERRIDE; + void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 658af8380d8d..0288db91dcbf 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -32,16 +32,19 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +#define DEBUG_TYPE "arm-instrinfo" + #define GET_INSTRINFO_CTOR_DTOR #include "ARMGenInstrInfo.inc" -using namespace llvm; - static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); @@ -100,14 +103,15 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl // currently defaults to no prepass hazard recognizer. -ScheduleHazardRecognizer *ARMBaseInstrInfo:: -CreateTargetHazardRecognizer(const TargetMachine *TM, - const ScheduleDAG *DAG) const { +ScheduleHazardRecognizer * +ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const { if (usePreRAHazardRecognizer()) { - const InstrItineraryData *II = TM->getInstrItineraryData(); + const InstrItineraryData *II = + &static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); } - return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); + return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); } ScheduleHazardRecognizer *ARMBaseInstrInfo:: @@ -125,14 +129,14 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // FIXME: Thumb2 support. if (!EnableARM3Addr) - return NULL; + return nullptr; MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); uint64_t TSFlags = MI->getDesc().TSFlags; bool isPre = false; switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { - default: return NULL; + default: return nullptr; case ARMII::IndexModePre: isPre = true; break; @@ -144,10 +148,10 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // operation. unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); if (MemOpc == 0) - return NULL; + return nullptr; - MachineInstr *UpdateMI = NULL; - MachineInstr *MemMI = NULL; + MachineInstr *UpdateMI = nullptr; + MachineInstr *MemMI = nullptr; unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); @@ -169,7 +173,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (ARM_AM::getSOImmVal(Amt) == -1) // Can't encode it in a so_imm operand. This transformation will // add more than 1 instruction. Abandon! - return NULL; + return nullptr; UpdateMI = BuildMI(MF, MI->getDebugLoc(), get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg).addImm(Amt) @@ -273,8 +277,8 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { - TBB = 0; - FBB = 0; + TBB = nullptr; + FBB = nullptr; MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) @@ -283,7 +287,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // Walk backwards from the end of the basic block until the branch is // analyzed or we give up. - while (isPredicated(I) || I->isTerminator()) { + while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) { // Flag to be raised on unanalyzeable instructions. This is useful in cases // where we want to clean up on the end of the basic block before we bail @@ -331,12 +335,12 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, I->isReturn())) { // Forget any previous condition branch information - it no longer applies. Cond.clear(); - FBB = 0; + FBB = nullptr; // If we can modify the function, delete everything below this // unconditional branch. if (AllowModify) { - MachineBasicBlock::iterator DI = llvm::next(I); + MachineBasicBlock::iterator DI = std::next(I); while (DI != MBB.end()) { MachineInstr *InstToDelete = DI; ++DI; @@ -405,7 +409,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "ARM branch conditions have two components!"); - if (FBB == 0) { + if (!FBB) { if (Cond.empty()) { // Unconditional branch? if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); @@ -535,6 +539,22 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { return true; } +namespace llvm { +template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isUse()) + continue; + if (MO.getReg() != ARM::CPSR) + continue; + if (!MO.isDead()) + return false; + } + // all definitions of CPSR are dead + return true; +} +} + /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. LLVM_ATTRIBUTE_NOINLINE static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, @@ -559,15 +579,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // If this machine instr is an inline asm, measure it. if (MI->getOpcode() == ARM::INLINEASM) return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); - if (MI->isLabel()) - return 0; unsigned Opc = MI->getOpcode(); switch (Opc) { - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::PROLOG_LABEL: - case TargetOpcode::EH_LABEL: - case TargetOpcode::DBG_VALUE: + default: + // pseudo-instruction sizes are zero. return 0; case TargetOpcode::BUNDLE: return getInstBundleLength(MI); @@ -611,7 +626,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); unsigned JTI = JTOP.getIndex(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != 0); + assert(MJTI != nullptr); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); assert(JTI < JT.size()); // Thumb instructions are 2 byte aligned, but JT entries are 4 byte @@ -630,9 +645,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { ++NumEntries; return NumEntries * EntrySize + InstSize; } - default: - // Otherwise, pseudo-instruction sizes are zero. - return 0; } } @@ -1242,7 +1254,8 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); unsigned PCLabelId = AFI->createPICLabelUId(); - ARMConstantPoolValue *NewCPV = 0; + ARMConstantPoolValue *NewCPV = nullptr; + // FIXME: The below assumes PIC relocation model and that the function // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR @@ -1325,10 +1338,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic || - Opcode == ARM::MOV_ga_dyn || + Opcode == ARM::LDRLIT_ga_pcrel || + Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || - Opcode == ARM::t2MOV_ga_dyn || Opcode == ARM::t2MOV_ga_pcrel) { if (MI1->getOpcode() != Opcode) return false; @@ -1340,10 +1354,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, if (MO0.getOffset() != MO1.getOffset()) return false; - if (Opcode == ARM::MOV_ga_dyn || + if (Opcode == ARM::LDRLIT_ga_pcrel || + Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || - Opcode == ARM::t2MOV_ga_dyn || Opcode == ARM::t2MOV_ga_pcrel) // Ignore the PC labels. return MO0.getGlobal() == MO1.getGlobal(); @@ -1534,7 +1549,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; // Terminators and labels can't be scheduled around. - if (MI->isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isPosition()) return true; // Treat the start of the IT block as a scheduling boundary, but schedule @@ -1650,10 +1665,10 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); // MOVCC AL can't be inverted. Shouldn't happen. if (CC == ARMCC::AL || PredReg != ARM::CPSR) - return NULL; + return nullptr; MI = TargetInstrInfo::commuteInstruction(MI, NewMI); if (!MI) - return NULL; + return nullptr; // After swapping the MOVCC operands, also invert the condition. MI->getOperand(MI->findFirstPredOperandIdx()) .setImm(ARMCC::getOppositeCondition(CC)); @@ -1669,35 +1684,36 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) - return 0; + return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) - return 0; + return nullptr; MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) - return 0; + return nullptr; // MI is folded into the MOVCC by predicating it. if (!MI->isPredicable()) - return 0; + return nullptr; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); // Reject frame index operands, PEI can't handle the predicated pseudos. if (MO.isFI() || MO.isCPI() || MO.isJTI()) - return 0; + return nullptr; if (!MO.isReg()) continue; // MI can't have any tied operands, that would conflict with predication. if (MO.isTied()) - return 0; + return nullptr; if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - return 0; + return nullptr; if (MO.isDef() && !MO.isDead()) - return 0; + return nullptr; } bool DontMoveAcrossStores = true; - if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) - return 0; + if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr, + DontMoveAcrossStores)) + return nullptr; return MI; } @@ -1732,14 +1748,14 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, if (!DefMI) DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); if (!DefMI) - return 0; + return nullptr; // Find new register class to use. MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); unsigned DestReg = MI->getOperand(0).getReg(); const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); if (!MRI.constrainRegClass(DestReg, PreviousClass)) - return 0; + return nullptr; // Create a new predicated version of DefMI. // Rfalse is the first use. @@ -1857,12 +1873,22 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, } } -bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF, - MachineInstr *MI, +static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI, + MachineInstr *MI) { + for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true); + Subreg.isValid(); ++Subreg) + if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) != + MachineBasicBlock::LQR_Dead) + return true; + return false; +} +bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, + MachineFunction &MF, MachineInstr *MI, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize)) + if (!MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize)) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR @@ -1911,7 +1937,6 @@ bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF, for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) RegList.push_back(MI->getOperand(i)); - MachineBasicBlock *MBB = MI->getParent(); const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); @@ -1932,9 +1957,11 @@ bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF, // registers live within the function we might clobber a return value // register; the other way a register can be live here is if it's // callee-saved. + // TODO: Currently, computeRegisterLiveness() does not report "live" if a + // sub reg is live. When computeRegisterLiveness() works for sub reg, it + // can replace isAnySubRegLive(). if (isCalleeSavedRegister(CurReg, CSRegs) || - MBB->computeRegisterLiveness(TRI, CurReg, MI) != - MachineBasicBlock::LQR_Dead) { + isAnySubRegLive(CurReg, TRI, MI)) { // VFP pops don't allow holes in the register list, so any skip is fatal // for our transformation. GPR pops do, so we should just keep looking. if (IsVFPPushPop) @@ -2159,7 +2186,7 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, // Walk down one instruction which is potentially an 'and'. const MachineInstr &Copy = *MI; MachineBasicBlock::iterator AND( - llvm::next(MachineBasicBlock::iterator(MI))); + std::next(MachineBasicBlock::iterator(MI))); if (AND == MI->getParent()->end()) return false; MI = AND; return isSuitableForMask(MI, Copy.getOperand(0).getReg(), @@ -2235,9 +2262,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { - MI = 0; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), - UE = MRI->use_end(); UI != UE; ++UI) { + MI = nullptr; + for (MachineRegisterInfo::use_instr_iterator + UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); + UI != UE; ++UI) { if (UI->getParent() != CmpInstr->getParent()) continue; MachineInstr *PotentialAND = &*UI; if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) || @@ -2261,17 +2289,17 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // One is MI, the other is a SUB instruction. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). - MachineInstr *Sub = NULL; + MachineInstr *Sub = nullptr; if (SrcReg2 != 0) // MI is not a candidate for CMPrr. - MI = NULL; + MI = nullptr; else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. // For CMPri, we need to check Sub, thus we can't return here. if (CmpInstr->getOpcode() == ARM::CMPri || CmpInstr->getOpcode() == ARM::t2CMPri) - MI = NULL; + MI = nullptr; else return false; } @@ -2947,7 +2975,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, break; } return UOps; - } else if (Subtarget.isCortexA8()) { + } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { if (NumRegs < 4) return 2; // 4 registers would be issued: 2, 2. @@ -2984,7 +3012,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(DefClass, DefIdx); int DefCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // (regno / 2) + (regno % 2) + 1 DefCycle = RegNo / 2 + 1; if (RegNo % 2) @@ -3025,7 +3053,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(DefClass, DefIdx); int DefCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // 4 registers would be issued: 1, 2, 1. // 5 registers would be issued: 1, 2, 2. DefCycle = RegNo / 2; @@ -3059,7 +3087,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(UseClass, UseIdx); int UseCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { // (regno / 2) + (regno % 2) + 1 UseCycle = RegNo / 2 + 1; if (RegNo % 2) @@ -3099,7 +3127,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, return ItinData->getOperandCycle(UseClass, UseIdx); int UseCycle; - if (Subtarget.isCortexA8()) { + if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { UseCycle = RegNo / 2; if (UseCycle < 2) UseCycle = 2; @@ -3236,8 +3264,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, Dist = 0; MachineBasicBlock::const_iterator I = MI; ++I; - MachineBasicBlock::const_instr_iterator II = - llvm::prior(I.getInstrIterator()); + MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator()); assert(II->isInsideBundle() && "Empty bundle?"); int Idx = -1; @@ -3276,7 +3303,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, if (Idx == -1) { Dist = 0; - return 0; + return nullptr; } UseIdx = Idx; @@ -3290,7 +3317,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, const MachineInstr *DefMI, const MCInstrDesc *DefMCID, unsigned DefAlign) { int Adjust = 0; - if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) { + if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID->getOpcode()) { @@ -3591,7 +3618,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, UseMCID, UseIdx, UseAlign); if (Latency > 1 && - (Subtarget.isCortexA8() || Subtarget.isLikeA9())) { + (Subtarget.isCortexA8() || Subtarget.isLikeA9() || + Subtarget.isCortexA7())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID.getOpcode()) { @@ -4333,6 +4361,29 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, MI->addRegisterKilled(DReg, TRI, true); } +void ARMBaseInstrInfo::getUnconditionalBranch( + MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const { + if (Subtarget.isThumb()) + Branch.setOpcode(ARM::tB); + else if (Subtarget.isThumb2()) + Branch.setOpcode(ARM::t2B); + else + Branch.setOpcode(ARM::Bcc); + + Branch.addOperand(MCOperand::CreateExpr(BranchTarget)); + Branch.addOperand(MCOperand::CreateImm(ARMCC::AL)); + Branch.addOperand(MCOperand::CreateReg(0)); +} + +void ARMBaseInstrInfo::getTrap(MCInst &MI) const { + if (Subtarget.isThumb()) + MI.setOpcode(ARM::tTRAP); + else if (Subtarget.useNaClTrap()) + MI.setOpcode(ARM::TRAPNaCl); + else + MI.setOpcode(ARM::TRAP); +} + bool ARMBaseInstrInfo::hasNOP() const { return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 93e59647d220..b8d675806b25 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -14,7 +14,7 @@ #ifndef ARMBASEINSTRUCTIONINFO_H #define ARMBASEINSTRUCTIONINFO_H -#include "ARM.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -42,37 +42,37 @@ public: // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; - virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, - MachineBasicBlock::iterator &MBBI, - LiveVariables *LV) const; + MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const override; virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0; const ARMSubtarget &getSubtarget() const { return Subtarget; } ScheduleHazardRecognizer * - CreateTargetHazardRecognizer(const TargetMachine *TM, - const ScheduleDAG *DAG) const; + CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const override; ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, - const ScheduleDAG *DAG) const; + const ScheduleDAG *DAG) const override; // Branch analysis. - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify = false) const; - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const; - - virtual - bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify = false) const override; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const override; + + bool + ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; // Predication support. - bool isPredicated(const MachineInstr *MI) const; + bool isPredicated(const MachineInstr *MI) const override; ARMCC::CondCodes getPredicate(const MachineInstr *MI) const { int PIdx = MI->findFirstPredOperandIdx(); @@ -80,76 +80,73 @@ public: : ARMCC::AL; } - virtual bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl<MachineOperand> &Pred) const; + const SmallVectorImpl<MachineOperand> &Pred) const override; - virtual bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, - const SmallVectorImpl<MachineOperand> &Pred2) const; + const SmallVectorImpl<MachineOperand> &Pred2) const override; - virtual bool DefinesPredicate(MachineInstr *MI, - std::vector<MachineOperand> &Pred) const; + bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const override; - virtual bool isPredicable(MachineInstr *MI) const; + bool isPredicable(MachineInstr *MI) const override; /// GetInstSize - Returns the size of the specified MachineInstr. /// virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, - int &FrameIndex) const; - virtual unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, - int &FrameIndex) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const override; + unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const override; - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; - virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; - virtual void reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig, - const TargetRegisterInfo &TRI) const; + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo &TRI) const override; - MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; + MachineInstr *duplicate(MachineInstr *Orig, + MachineFunction &MF) const override; - MachineInstr *commuteInstruction(MachineInstr*, bool=false) const; + MachineInstr *commuteInstruction(MachineInstr*, + bool=false) const override; const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI) const; - virtual bool produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1, - const MachineRegisterInfo *MRI) const; + bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const override; /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to /// determine if two loads are loading from the same base address. It should /// only return true if the base pointers are the same and the only /// differences between the two addresses is the offset. It also returns the /// offsets by reference. - virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, - int64_t &Offset1, int64_t &Offset2)const; + bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, + int64_t &Offset2) const override; /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads @@ -159,83 +156,86 @@ public: /// from the common base address. It returns true if it decides it's desirable /// to schedule the two loads together. "NumLoads" is the number of loads that /// have already been scheduled after Load1. - virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, - int64_t Offset1, int64_t Offset2, - unsigned NumLoads) const; - - virtual bool isSchedulingBoundary(const MachineInstr *MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const; - - virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCycles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const; - - virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumT, unsigned ExtraT, - MachineBasicBlock &FMBB, - unsigned NumF, unsigned ExtraF, - const BranchProbability &Probability) const; - - virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, - unsigned NumCycles, - const BranchProbability - &Probability) const { + bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const override; + + bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; + + bool isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + const BranchProbability &Probability) const override; + + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, + unsigned ExtraT, MachineBasicBlock &FMBB, + unsigned NumF, unsigned ExtraF, + const BranchProbability &Probability) const override; + + bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + const BranchProbability &Probability) const override { return NumCycles == 1; } - virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, - MachineBasicBlock &FMBB) const; + bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const override; /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction /// can be analyzed. - virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, - unsigned &SrcReg2, int &CmpMask, - int &CmpValue) const; + bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &CmpMask, + int &CmpValue) const override; /// optimizeCompareInstr - Convert the instruction to set the zero flag so /// that we can remove a "comparison with zero"; Remove a redundant CMP /// instruction if the flags can be updated in the same way by an earlier /// instruction such as SUB. - virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, - unsigned SrcReg2, int CmpMask, int CmpValue, - const MachineRegisterInfo *MRI) const; + bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int CmpMask, int CmpValue, + const MachineRegisterInfo *MRI) const override; - virtual bool analyzeSelect(const MachineInstr *MI, - SmallVectorImpl<MachineOperand> &Cond, - unsigned &TrueOp, unsigned &FalseOp, - bool &Optimizable) const; + bool analyzeSelect(const MachineInstr *MI, + SmallVectorImpl<MachineOperand> &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const override; - virtual MachineInstr *optimizeSelect(MachineInstr *MI, bool) const; + MachineInstr *optimizeSelect(MachineInstr *MI, bool) const override; /// FoldImmediate - 'Reg' is known to be defined by a move immediate /// instruction, try to fold the immediate into the use instruction. - virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, - unsigned Reg, MachineRegisterInfo *MRI) const; + bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const override; - virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, - const MachineInstr *MI) const; + unsigned getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr *MI) const override; - virtual int getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const; - virtual + const MachineInstr *UseMI, + unsigned UseIdx) const override; int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, - SDNode *UseNode, unsigned UseIdx) const; + SDNode *UseNode, unsigned UseIdx) const override; /// VFP/NEON execution domains. std::pair<uint16_t, uint16_t> - getExecutionDomain(const MachineInstr *MI) const; - void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; + getExecutionDomain(const MachineInstr *MI) const override; + void setExecutionDomain(MachineInstr *MI, unsigned Domain) const override; unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned, - const TargetRegisterInfo*) const; + const TargetRegisterInfo*) const override; void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; + + void + getUnconditionalBranch(MCInst &Branch, + const MCSymbolRefExpr *BranchTarget) const override; + + void getTrap(MCInst &MI) const override; + /// Get the number of addresses by LDM or VLDM or zero for unknown. unsigned getNumLDMAddresses(const MachineInstr *MI) const; @@ -264,24 +264,27 @@ private: const MCInstrDesc &UseMCID, unsigned UseIdx, unsigned UseAlign) const; - unsigned getPredicationCost(const MachineInstr *MI) const; + unsigned getPredicationCost(const MachineInstr *MI) const override; unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, - unsigned *PredCost = 0) const; + unsigned *PredCost = nullptr) const override; int getInstrLatency(const InstrItineraryData *ItinData, - SDNode *Node) const; + SDNode *Node) const override; bool hasHighOperandLatency(const InstrItineraryData *ItinData, const MachineRegisterInfo *MRI, const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const; + const MachineInstr *UseMI, + unsigned UseIdx) const override; bool hasLowDefLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx) const; + const MachineInstr *DefMI, + unsigned DefIdx) const override; /// verifyInstruction - Perform target specific instruction verification. - bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const; + bool verifyInstruction(const MachineInstr *MI, + StringRef &ErrInfo) const override; private: /// Modeling special VFP / NEON fp MLA / MLS hazards. @@ -417,7 +420,8 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, /// NumBytes. This can save a few bytes per function in code-size, but /// obviously generates more memory traffic. As such, it only takes /// effect in functions being optimised for size. -bool tryFoldSPUpdateIntoPushPop(MachineFunction &MF, MachineInstr *MI, +bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, + MachineFunction &MF, MachineInstr *MI, unsigned NumBytes); /// rewriteARMFrameIndex / rewriteT2FrameIndex - diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 8717dc0cde90..32b5f4aa2942 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -38,20 +38,29 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#define DEBUG_TYPE "arm-register-info" + #define GET_REGINFO_TARGET_DESC #include "ARMGenRegisterInfo.inc" using namespace llvm; ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), - FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), - BasePtr(ARM::R6) { + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) { + if (STI.isTargetMachO()) { + if (STI.isTargetDarwin() || STI.isThumb1Only()) + FramePtr = ARM::R7; + else + FramePtr = ARM::R11; + } else if (STI.isTargetWindows()) + FramePtr = ARM::R11; + else // ARM EABI + FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11; } -const uint16_t* +const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + const MCPhysReg *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; @@ -107,7 +116,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { // should return NULL if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls - return NULL; + return nullptr; return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; } @@ -173,7 +182,7 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind const TargetRegisterClass * ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &ARM::CCRRegClass) - return 0; // Can't copy CCR registers. + return nullptr; // Can't copy CCR registers. return RC; } @@ -408,6 +417,11 @@ emitLoadConstPool(MachineBasicBlock &MBB, .setMIFlags(MIFlags); } +bool ARMBaseRegisterInfo::mayOverrideLocalAssignment() const { + // The native linux build hits a downstream codegen bug when this is enabled. + return STI.isTargetDarwin(); +} + bool ARMBaseRegisterInfo:: requiresRegisterScavenging(const MachineFunction &MF) const { return true; @@ -590,10 +604,8 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, AddDefaultCC(MIB); } -void -ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, - unsigned BaseReg, int64_t Offset) const { - MachineInstr &MI = *I; +void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const { MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = @@ -765,3 +777,60 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true); } } + +bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC) const { + auto MBB = MI->getParent(); + auto MF = MBB->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + // If not copying into a sub-register this should be ok because we shouldn't + // need to split the reg. + if (!DstSubReg) + return true; + // Small registers don't frequently cause a problem, so we can coalesce them. + if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32) + return true; + + auto NewRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(NewRC); + auto SrcRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(SrcRC); + auto DstRCWeight = + MRI.getTargetRegisterInfo()->getRegClassWeight(DstRC); + // If the source register class is more expensive than the destination, the + // coalescing is probably profitable. + if (SrcRCWeight.RegWeight > NewRCWeight.RegWeight) + return true; + if (DstRCWeight.RegWeight > NewRCWeight.RegWeight) + return true; + + // If the register allocator isn't constrained, we can always allow coalescing + // unfortunately we don't know yet if we will be constrained. + // The goal of this heuristic is to restrict how many expensive registers + // we allow to coalesce in a given basic block. + auto AFI = MF->getInfo<ARMFunctionInfo>(); + auto It = AFI->getCoalescedWeight(MBB); + + DEBUG(dbgs() << "\tARM::shouldCoalesce - Coalesced Weight: " + << It->second << "\n"); + DEBUG(dbgs() << "\tARM::shouldCoalesce - Reg Weight: " + << NewRCWeight.RegWeight << "\n"); + + // This number is the largest round number that which meets the criteria: + // (1) addresses PR18825 + // (2) generates better code in some test cases (like vldm-shed-a9.ll) + // (3) Doesn't regress any test cases (in-tree, test-suite, and SPEC) + // In practice the SizeMultiplier will only factor in for straight line code + // that uses a lot of NEON vectors, which isn't terribly common. + unsigned SizeMultiplier = MBB->size()/100; + SizeMultiplier = SizeMultiplier ? SizeMultiplier : 1; + if (It->second < NewRCWeight.WeightLimit * SizeMultiplier) { + It->second += NewRCWeight.RegWeight; + return true; + } + return false; +} diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index e28fff68f4e2..833d3f218480 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -14,7 +14,7 @@ #ifndef ARMBASEREGISTERINFO_H #define ARMBASEREGISTERINFO_H -#include "ARM.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #define GET_REGINFO_HEADER @@ -42,7 +42,7 @@ static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { case R4: case R5: case R6: case R7: case LR: case SP: case PC: return true; - case R8: case R9: case R10: case R11: + case R8: case R9: case R10: case R11: case R12: // For iOS we want r7 and lr to be next to each other. return !isIOS; default: @@ -53,7 +53,7 @@ static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { static inline bool isARMArea2Register(unsigned Reg, bool isIOS) { using namespace ARM; switch (Reg) { - case R8: case R9: case R10: case R11: + case R8: case R9: case R10: case R11: case R12: // iOS has this second area. return isIOS; default: @@ -100,8 +100,9 @@ protected: public: /// Code Generation virtual methods... - const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const uint32_t *getCallPreservedMask(CallingConv::ID) const; + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const uint32_t *getCallPreservedMask(CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const; /// getThisReturnPreservedMask - Returns a call preserved mask specific to the @@ -113,48 +114,51 @@ public: /// Should return NULL in the case that the calling convention does not have /// this property const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; - - BitVector getReservedRegs(const MachineFunction &MF) const; - const TargetRegisterClass* - getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; - const TargetRegisterClass* - getCrossCopyRegClass(const TargetRegisterClass *RC) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; - const TargetRegisterClass* - getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override; + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + + const TargetRegisterClass * + getLargestLegalSuperClass(const TargetRegisterClass *RC) const override; unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const; + MachineFunction &MF) const override; void getRegAllocationHints(unsigned VirtReg, ArrayRef<MCPhysReg> Order, SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF, - const VirtRegMap *VRM) const; + const VirtRegMap *VRM) const override; void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, - MachineFunction &MF) const; + MachineFunction &MF) const override; - virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const override; bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; - bool needsStackRealignment(const MachineFunction &MF) const; - int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const; - bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const; + bool needsStackRealignment(const MachineFunction &MF) const override; + int64_t getFrameIndexInstrOffset(const MachineInstr *MI, + int Idx) const override; + bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, - int64_t Offset) const; - void resolveFrameIndex(MachineBasicBlock::iterator I, - unsigned BaseReg, int64_t Offset) const; - bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const; + int64_t Offset) const override; + void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const override; + bool isFrameOffsetLegal(const MachineInstr *MI, + int64_t Offset) const override; bool cannotEliminateFrame(const MachineFunction &MF) const; // Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const override; unsigned getBaseRegister() const { return BasePtr; } bool isLowRegister(unsigned Reg) const; @@ -164,25 +168,33 @@ public: /// specified immediate. virtual void emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, - int Val, - ARMCC::CondCodes Pred = ARMCC::AL, + DebugLoc dl, unsigned DestReg, unsigned SubIdx, + int Val, ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0, unsigned MIFlags = MachineInstr::NoFlags)const; /// Code Generation virtual methods... - virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool mayOverrideLocalAssignment() const override; + + bool requiresRegisterScavenging(const MachineFunction &MF) const override; + + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; - virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; - virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const; + bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override; - virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const; + /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true + bool shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC) const override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h deleted file mode 100644 index b16d4ef54b6d..000000000000 --- a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h +++ /dev/null @@ -1,170 +0,0 @@ -//===-- ARMBuildAttrs.h - ARM Build Attributes ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains enumerations and support routines for ARM build attributes -// as defined in ARM ABI addenda document (ABI release 2.08). -// -//===----------------------------------------------------------------------===// - -#ifndef __TARGET_ARMBUILDATTRS_H__ -#define __TARGET_ARMBUILDATTRS_H__ - -namespace llvm { -namespace ARMBuildAttrs { - - enum SpecialAttr { - // This is for the .cpu asm attr. It translates into one or more - // AttrType (below) entries in the .ARM.attributes section in the ELF. - SEL_CPU - }; - - enum AttrType { - // Rest correspond to ELF/.ARM.attributes - File = 1, - Section = 2, - Symbol = 3, - CPU_raw_name = 4, - CPU_name = 5, - CPU_arch = 6, - CPU_arch_profile = 7, - ARM_ISA_use = 8, - THUMB_ISA_use = 9, - VFP_arch = 10, - WMMX_arch = 11, - Advanced_SIMD_arch = 12, - PCS_config = 13, - ABI_PCS_R9_use = 14, - ABI_PCS_RW_data = 15, - ABI_PCS_RO_data = 16, - ABI_PCS_GOT_use = 17, - ABI_PCS_wchar_t = 18, - ABI_FP_rounding = 19, - ABI_FP_denormal = 20, - ABI_FP_exceptions = 21, - ABI_FP_user_exceptions = 22, - ABI_FP_number_model = 23, - ABI_align8_needed = 24, - ABI_align8_preserved = 25, - ABI_enum_size = 26, - ABI_HardFP_use = 27, - ABI_VFP_args = 28, - ABI_WMMX_args = 29, - ABI_optimization_goals = 30, - ABI_FP_optimization_goals = 31, - compatibility = 32, - CPU_unaligned_access = 34, - FP_HP_extension = 36, - ABI_FP_16bit_format = 38, - MPextension_use = 42, // was 70, 2.08 ABI - DIV_use = 44, - nodefaults = 64, - also_compatible_with = 65, - T2EE_use = 66, - conformance = 67, - Virtualization_use = 68, - MPextension_use_old = 70 - }; - - // Magic numbers for .ARM.attributes - enum AttrMagic { - Format_Version = 0x41 - }; - - // Legal Values for CPU_arch, (=6), uleb128 - enum CPUArch { - Pre_v4 = 0, - v4 = 1, // e.g. SA110 - v4T = 2, // e.g. ARM7TDMI - v5T = 3, // e.g. ARM9TDMI - v5TE = 4, // e.g. ARM946E_S - v5TEJ = 5, // e.g. ARM926EJ_S - v6 = 6, // e.g. ARM1136J_S - v6KZ = 7, // e.g. ARM1176JZ_S - v6T2 = 8, // e.g. ARM1156T2F_S - v6K = 9, // e.g. ARM1136J_S - v7 = 10, // e.g. Cortex A8, Cortex M3 - v6_M = 11, // e.g. Cortex M1 - v6S_M = 12, // v6_M with the System extensions - v7E_M = 13, // v7_M with DSP extensions - v8 = 14 // v8, AArch32 - }; - - enum CPUArchProfile { // (=7), uleb128 - Not_Applicable = 0, // pre v7, or cross-profile code - ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8) - RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4) - MicroControllerProfile = (0x4D), // 'M' (e.g. for Cortex M3) - SystemProfile = (0x53) // 'S' Application or real-time profile - }; - - // The following have a lot of common use cases - enum { - Not_Allowed = 0, - Allowed = 1, - - // Tag_ARM_ISA_use (=8), uleb128 - - // Tag_THUMB_ISA_use, (=9), uleb128 - AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions) - - // Tag_FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10) - AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA) - AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA) - AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 - AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) - AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31 - AllowFPARMv8A = 7, // Use of the ARM v8-A FP ISA was permitted - AllowFPARMv8B = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31 - - // Tag_WMMX_arch, (=11), uleb128 - AllowWMMXv1 = 1, // The user permitted this entity to use WMMX v1 - AllowWMMXv2 = 2, // The user permitted this entity to use WMMX v2 - - // Tag_Advanced_SIMD_arch, (=12), uleb128 - AllowNeon = 1, // SIMDv1 was permitted - AllowNeon2 = 2, // SIMDv2 was permitted (Half-precision FP, MAC operations) - AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted - - // Tag_ABI_FP_denormal, (=20), uleb128 - PreserveFPSign = 2, // sign when flushed-to-zero is preserved - - // Tag_ABI_FP_number_model, (=23), uleb128 - AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI]) - AllowIEE754 = 3, // this code to use all the IEEE 754-defined FP encodings - - // Tag_ABI_HardFP_use, (=27), uleb128 - HardFPImplied = 0, // FP use should be implied by Tag_FP_arch - HardFPSinglePrecision = 1, // Single-precision only - - // Tag_ABI_VFP_args, (=28), uleb128 - BaseAAPCS = 0, - HardFPAAPCS = 1, - - // Tag_FP_HP_extension, (=36), uleb128 - AllowHPFP = 1, // Allow use of Half Precision FP - - // Tag_MPextension_use, (=42), uleb128 - AllowMP = 1, // Allow use of MP extensions - - // Tag_DIV_use, (=44), uleb128 - AllowDIVIfExists = 0, // Allow hardware divide if available in arch, or no info exists. - DisallowDIV = 1, // Hardware divide explicitly disallowed - AllowDIVExt = 2, // Allow hardware divide as optional architecture extension above - // the base arch specified by Tag_CPU_arch and Tag_CPU_arch_profile. - - // Tag_Virtualization_use, (=68), uleb128 - AllowTZ = 1, - AllowVirtualization = 2, - AllowTZVirtualization = 3 - }; - -} // namespace ARMBuildAttrs -} // namespace llvm - -#endif // __TARGET_ARMBUILDATTRS_H__ diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h index 4f94ad2403d3..dc41c1c14bbb 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h @@ -28,7 +28,7 @@ namespace llvm { static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; // Try to get the first register. if (unsigned Reg = State.AllocateReg(RegList, 4)) @@ -71,10 +71,10 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; - static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; - static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 }; - static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; + static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; + static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 }; + static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { @@ -123,8 +123,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State) { - static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; - static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; + static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; + static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); if (Reg == 0) @@ -160,6 +160,105 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State); } +static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; +static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; +static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; + +// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA +// has InConsecutiveRegs set, and that the last member also has +// InConsecutiveRegsLast set. We must process all members of the HA before +// we can allocate it, as we need to know the total number of registers that +// will be needed in order to (attempt to) allocate a contiguous block. +static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs(); + // AAPCS HFAs must have 1-4 elements, all of the same type + assert(PendingHAMembers.size() < 8); + if (PendingHAMembers.size() > 0) + assert(PendingHAMembers[0].getLocVT() == LocVT); + + // Add the argument to the list to be allocated once we know the size of the + // HA + PendingHAMembers.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + + if (ArgFlags.isInConsecutiveRegsLast()) { + assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 && + "Homogeneous aggregates must have between 1 and 4 members"); + + // Try to allocate a contiguous block of registers, each of the correct + // size to hold one member. + const uint16_t *RegList; + unsigned NumRegs; + switch (LocVT.SimpleTy) { + case MVT::i32: + case MVT::f32: + RegList = SRegList; + NumRegs = 16; + break; + case MVT::f64: + RegList = DRegList; + NumRegs = 8; + break; + case MVT::v2f64: + RegList = QRegList; + NumRegs = 4; + break; + default: + llvm_unreachable("Unexpected member type for HA"); + break; + } + + unsigned RegResult = + State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size()); + + if (RegResult) { + for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin(); + It != PendingHAMembers.end(); ++It) { + It->convertToReg(RegResult); + State.addLoc(*It); + ++RegResult; + } + PendingHAMembers.clear(); + return true; + } + + // Register allocation failed, fall back to the stack + + // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp) + for (unsigned regNo = 0; regNo < 16; ++regNo) + State.AllocateReg(SRegList[regNo]); + + unsigned Size = LocVT.getSizeInBits() / 8; + unsigned Align = Size; + + if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) { + // Vectors are always aligned to 8 bytes. If we've seen an i32 here + // it's because it's been split from a larger type, also with align 8. + Align = 8; + } + + for (auto It : PendingHAMembers) { + It.convertToMem(State.AllocateStack(Size, Align)); + State.addLoc(It); + + // Only the first member needs to be aligned. + Align = 1; + } + + // All pending members have now been allocated + PendingHAMembers.clear(); + } + + // This will be allocated by the last member of the HA + return true; +} + } // End llvm namespace #endif diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index 9bea4b2d68e9..526089b01b6f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -64,6 +64,13 @@ def FastCC_ARM_APCS : CallingConv<[ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, + + // CPRCs may be allocated to co-processor registers or the stack - they + // may never be allocated to core registers. + CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToStackWithShadow<8, 4, [Q0, Q1, Q2, Q3]>>, + CCIfType<[v2f64], CCAssignToStackWithShadow<16, 4, [Q0, Q1, Q2, Q3]>>, + CCDelegateTo<CC_ARM_APCS> ]>; @@ -114,10 +121,11 @@ def CC_ARM_AAPCS_Common : CallingConv<[ CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, - CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>, - CCIfType<[i32, f32], CCAssignToStack<4, 4>>, - CCIfType<[f64], CCAssignToStack<8, 8>>, - CCIfType<[v2f64], CCAssignToStack<16, 8>> + CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, [R0, R1, R2, R3]>>>, + CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>, + CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>, + CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>> ]>; def RetCC_ARM_AAPCS_Common : CallingConv<[ @@ -166,6 +174,9 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + // HFAs are passed in a contiguous block of registers, or on the stack + CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>, + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp index 568ca858c4d2..5fb6ebfeaae1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" @@ -40,6 +40,8 @@ #endif using namespace llvm; +#define DEBUG_TYPE "jit" + STATISTIC(NumEmitted, "Number of machine instructions emitted"); namespace { @@ -57,7 +59,7 @@ namespace { bool IsPIC; bool IsThumb; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineModuleInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -65,10 +67,10 @@ namespace { static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(ID), JTI(0), + : MachineFunctionPass(ID), JTI(nullptr), II((const ARMBaseInstrInfo *)tm.getInstrInfo()), TD(tm.getDataLayout()), TM(tm), - MCE(mce), MCPEs(0), MJTEs(0), + MCE(mce), MCPEs(nullptr), MJTEs(nullptr), IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} /// getBinaryCodeForInstr - This function, generated by the @@ -76,9 +78,9 @@ namespace { /// machine instructions. uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; - bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "ARM Machine Code Emitter"; } @@ -207,8 +209,6 @@ namespace { const { return 0; } unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } - unsigned getT2AddrModeImm12OpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op) @@ -219,8 +219,6 @@ namespace { const { return 0; } unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } - unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op) - const { return 0; } unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op) @@ -238,10 +236,6 @@ namespace { const { return 0; } unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } - unsigned getSsatBitPosValue(const MachineInstr &MI, - unsigned Op) const { return 0; } - uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx) - const {return 0; } uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx) const { return 0; } @@ -270,8 +264,6 @@ namespace { return 0; } - uint32_t getAddrMode2OpValue(const MachineInstr &MI, unsigned OpIdx) - const { return 0;} uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) const { return 0;} uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx) @@ -282,8 +274,6 @@ namespace { const { return 0; } uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } - uint32_t getAddrModeSOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op) @@ -385,7 +375,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); - MJTEs = 0; + MJTEs = nullptr; if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); IsPIC = TM.getRelocationModel() == Reloc::PIC_; IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction(); @@ -866,7 +856,8 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { } break; } - case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::CFI_INSTRUCTION: + break; case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index cff5ce27bca6..ce264eeef90f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" @@ -36,6 +35,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "arm-cp-islands" + STATISTIC(NumCPEs, "Number of constpool entries"); STATISTIC(NumSplit, "Number of uncond branches inserted"); STATISTIC(NumCBrFixed, "Number of cond branches fixed"); @@ -266,9 +267,9 @@ namespace { static char ID; ARMConstantIslands() : MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "ARM constant island placement and branch shortening pass"; } @@ -569,10 +570,10 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. MachineFunction::iterator MBBI = MBB; // Can't fall off end of function. - if (llvm::next(MBBI) == MBB->getParent()->end()) + if (std::next(MBBI) == MBB->getParent()->end()) return false; - MachineBasicBlock *NextBB = llvm::next(MBBI); + MachineBasicBlock *NextBB = std::next(MBBI); for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) if (*I == NextBB) @@ -593,7 +594,7 @@ ARMConstantIslands::CPEntry if (CPEs[i].CPEMI == CPEMI) return &CPEs[i]; } - return NULL; + return nullptr; } /// getCPELogAlign - Returns the required alignment of the constant pool entry @@ -917,7 +918,7 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { CompareMBBNumbers); MachineBasicBlock* WaterBB = *IP; if (WaterBB == OrigBB) - WaterList.insert(llvm::next(IP), NewBB); + WaterList.insert(std::next(IP), NewBB); else WaterList.insert(IP, OrigBB); NewWaterList.insert(OrigBB); @@ -1102,7 +1103,7 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, assert(CPE && "Unexpected!"); if (--CPE->RefCount == 0) { removeDeadCPEMI(CPEMI); - CPE->CPEMI = NULL; + CPE->CPEMI = nullptr; --NumCPEs; return true; } @@ -1135,7 +1136,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) if (CPEs[i].CPEMI == CPEMI) continue; // Removing CPEs can leave empty entries, skip - if (CPEs[i].CPEMI == NULL) + if (CPEs[i].CPEMI == nullptr) continue; if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), U.NegOk)) { @@ -1188,7 +1189,7 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, return false; unsigned BestGrowth = ~0u; - for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();; + for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();; --IP) { MachineBasicBlock* WaterBB = *IP; // Check if water is in range and is either at a lower address than the @@ -1249,7 +1250,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, if (isOffsetInRange(UserOffset, CPEOffset, U)) { DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() << format(", expected CPE offset %#x\n", CPEOffset)); - NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); + NewMBB = std::next(MachineFunction::iterator(UserMBB)); // Add an unconditional branch from UserMBB to fallthrough block. Record // it for branch lengthening; this new branch will not get out of range, // but if the preceding conditional branch is out of range, the targets @@ -1317,11 +1318,10 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, ++MI; unsigned CPUIndex = CPUserIndex+1; unsigned NumCPUsers = CPUsers.size(); - MachineInstr *LastIT = 0; + MachineInstr *LastIT = nullptr; for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); Offset < BaseInsertOffset; - Offset += TII->GetInstSizeInBytes(MI), - MI = llvm::next(MI)) { + Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) { assert(MI != UserMBB->end() && "Fell off end of block"); if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { CPUser &U = CPUsers[CPUIndex]; @@ -1393,7 +1393,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { NewWaterList.insert(NewIsland); // The new CPE goes before the following block (NewMBB). - NewMBB = llvm::next(MachineFunction::iterator(WaterBB)); + NewMBB = std::next(MachineFunction::iterator(WaterBB)); } else { // No water found. @@ -1405,7 +1405,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // next iteration for constant pools, but in this context, we don't want // it. Check for this so it will be removed from the WaterList. // Also remove any entry from NewWaterList. - MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB)); + MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB)); IP = std::find(WaterList.begin(), WaterList.end(), WaterBB); if (IP != WaterList.end()) NewWaterList.erase(WaterBB); @@ -1443,7 +1443,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // Increase the size of the island block to account for the new entry. BBInfo[NewIsland->getNumber()].Size += Size; - adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); + adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland))); // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1492,7 +1492,7 @@ bool ARMConstantIslands::removeUnusedCPEntries() { for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { removeDeadCPEMI(CPEs[j].CPEMI); - CPEs[j].CPEMI = NULL; + CPEs[j].CPEMI = nullptr; MadeChange = true; } } @@ -1592,7 +1592,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { ++NumCBrFixed; if (BMI != MI) { - if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && + if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) && BMI->getOpcode() == Br.UncondBr) { // Last MI in the BB is an unconditional branch. Can we simply invert the // condition and swap destinations: @@ -1622,7 +1622,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { MBB->back().eraseFromParent(); // BBInfo[SplitBB].Offset is wrong temporarily, fixed below } - MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << " also invert condition and change dest. to BB#" @@ -1845,7 +1845,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { // FIXME: After the tables are shrunk, can we get rid some of the // constantpool tables? MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - if (MJTI == 0) return false; + if (!MJTI) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { @@ -1971,7 +1971,7 @@ bool ARMConstantIslands::reorderThumb2JumpTables() { bool MadeChange = false; MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - if (MJTI == 0) return false; + if (!MJTI) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { @@ -2013,11 +2013,11 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple // heuristic. FIXME: We can definitely improve it. - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; SmallVector<MachineOperand, 4> CondPrior; MachineFunction::iterator BBi = BB; - MachineFunction::iterator OldPrior = prior(BBi); + MachineFunction::iterator OldPrior = std::prev(BBi); // If the block terminator isn't analyzable, don't try to move the block bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond); @@ -2033,7 +2033,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // Update numbering to account for the block being moved. MF->RenumberBlocks(); ++NumJTMoved; - return NULL; + return nullptr; } // Create a new MBB for the code after the jump BB. diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index 7ae7bf46f19d..c7a84155bd48 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -103,12 +103,12 @@ public: bool isLSDA() const { return Kind == ARMCP::CPLSDA; } bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; } - virtual unsigned getRelocationInfo() const { return 2; } + unsigned getRelocationInfo() const override { return 2; } - virtual int getExistingMachineCPValue(MachineConstantPool *CP, - unsigned Alignment); + int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) override; - virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); + void addSelectionDAGCSEId(FoldingSetNodeID &ID) override; /// hasSameValue - Return true if this ARM constpool value can share the same /// constantpool entry as another ARM constpool value. @@ -120,7 +120,7 @@ public: this->Modifier == A->Modifier; } - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; void print(raw_ostream *O) const { if (O) print(*O); } void dump() const; }; @@ -164,16 +164,16 @@ public: const GlobalValue *getGV() const; const BlockAddress *getBlockAddress() const; - virtual int getExistingMachineCPValue(MachineConstantPool *CP, - unsigned Alignment); + int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) override; /// hasSameValue - Return true if this ARM constpool value can share the same /// constantpool entry as another ARM constpool value. - virtual bool hasSameValue(ARMConstantPoolValue *ACPV); + bool hasSameValue(ARMConstantPoolValue *ACPV) override; - virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); + void addSelectionDAGCSEId(FoldingSetNodeID &ID) override; - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; static bool classof(const ARMConstantPoolValue *APV) { return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA(); } @@ -198,16 +198,16 @@ public: const char *getSymbol() const { return S.c_str(); } - virtual int getExistingMachineCPValue(MachineConstantPool *CP, - unsigned Alignment); + int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) override; - virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); + void addSelectionDAGCSEId(FoldingSetNodeID &ID) override; /// hasSameValue - Return true if this ARM constpool value can share the same /// constantpool entry as another ARM constpool value. - virtual bool hasSameValue(ARMConstantPoolValue *ACPV); + bool hasSameValue(ARMConstantPoolValue *ACPV) override; - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isExtSymbol(); @@ -234,16 +234,16 @@ public: const MachineBasicBlock *getMBB() const { return MBB; } - virtual int getExistingMachineCPValue(MachineConstantPool *CP, - unsigned Alignment); + int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) override; - virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID); + void addSelectionDAGCSEId(FoldingSetNodeID &ID) override; /// hasSameValue - Return true if this ARM constpool value can share the same /// constantpool entry as another ARM constpool value. - virtual bool hasSameValue(ARMConstantPoolValue *ACPV); + bool hasSameValue(ARMConstantPoolValue *ACPV) override; - virtual void print(raw_ostream &O) const; + void print(raw_ostream &O) const override; static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isMachineBasicBlock(); diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 3e62b649e0dc..51d3dbb5bd8e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -14,21 +14,25 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-pseudo" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "arm-pseudo" + static cl::opt<bool> VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden, cl::desc("Verify machine code after expanding ARM pseudos")); @@ -44,9 +48,9 @@ namespace { const ARMSubtarget *STI; ARMFunctionInfo *AFI; - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "ARM pseudo instruction expansion pass"; } @@ -343,7 +347,7 @@ static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode); if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode) return I; - return NULL; + return nullptr; } /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register, @@ -479,6 +483,8 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. MIB->addRegisterKilled(SrcReg, TRI, true); + else if (!SrcIsUndef) + MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg. TransferImpOps(MI, MIB, MIB); // Transfer memoperands. @@ -604,12 +610,45 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); - if (SrcIsKill) // Add an implicit kill for the super-reg. - MIB->addRegisterKilled(SrcReg, TRI, true); + // Add an implicit kill and use for the super-reg. + MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill)); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); } +static bool IsAnAddressOperand(const MachineOperand &MO) { + // This check is overly conservative. Unless we are certain that the machine + // operand is not a symbol reference, we return that it is a symbol reference. + // This is important as the load pair may not be split up Windows. + switch (MO.getType()) { + case MachineOperand::MO_Register: + case MachineOperand::MO_Immediate: + case MachineOperand::MO_CImmediate: + case MachineOperand::MO_FPImmediate: + return false; + case MachineOperand::MO_MachineBasicBlock: + return true; + case MachineOperand::MO_FrameIndex: + return false; + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_BlockAddress: + return true; + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: + return false; + case MachineOperand::MO_Metadata: + case MachineOperand::MO_MCSymbol: + return true; + case MachineOperand::MO_CFIIndex: + return false; + } + llvm_unreachable("unhandled machine operand type"); +} + void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -620,10 +659,14 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, bool DstIsDead = MI.getOperand(0).isDead(); bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); + bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO); MachineInstrBuilder LO16, HI16; if (!STI->hasV6T2Ops() && (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { + // FIXME Windows CE supports older ARM CPUs + assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+"); + // Expand into a movi + orr. LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) @@ -660,17 +703,29 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg); - if (MO.isImm()) { + switch (MO.getType()) { + case MachineOperand::MO_Immediate: { unsigned Imm = MO.getImm(); unsigned Lo16 = Imm & 0xffff; unsigned Hi16 = (Imm >> 16) & 0xffff; LO16 = LO16.addImm(Lo16); HI16 = HI16.addImm(Hi16); - } else { + break; + } + case MachineOperand::MO_ExternalSymbol: { + const char *ES = MO.getSymbolName(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16); + HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16); + break; + } + default: { const GlobalValue *GV = MO.getGlobal(); unsigned TF = MO.getTargetFlags(); LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); + break; + } } LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -678,6 +733,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, LO16.addImm(Pred).addReg(PredReg); HI16.addImm(Pred).addReg(PredReg); + if (RequiresBundling) + finalizeBundle(MBB, &*LO16, &*MBBI); + TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); } @@ -869,10 +927,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } case ARM::tTPsoft: case ARM::TPsoft: { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL)) - .addExternalSymbol("__aeabi_read_tp", 0); + MachineInstrBuilder MIB; + if (Opcode == ARM::tTPsoft) + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get( ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__aeabi_read_tp", 0); + else + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get( ARM::BL)) + .addExternalSymbol("__aeabi_read_tp", 0); MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); @@ -900,10 +964,61 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::MOV_ga_dyn: + case ARM::LDRLIT_ga_abs: + case ARM::LDRLIT_ga_pcrel: + case ARM::LDRLIT_ga_pcrel_ldr: + case ARM::tLDRLIT_ga_abs: + case ARM::tLDRLIT_ga_pcrel: { + unsigned DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + const MachineOperand &MO1 = MI.getOperand(1); + const GlobalValue *GV = MO1.getGlobal(); + bool IsARM = + Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs; + bool IsPIC = + Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs; + unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci; + unsigned PICAddOpc = + IsARM + ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICADD : ARM::PICLDR) + : ARM::tPICADD; + + // We need a new const-pool entry to load from. + MachineConstantPool *MCP = MBB.getParent()->getConstantPool(); + unsigned ARMPCLabelIndex = 0; + MachineConstantPoolValue *CPV; + + if (IsPIC) { + unsigned PCAdj = IsARM ? 8 : 4; + ARMPCLabelIndex = AFI->createPICLabelUId(); + CPV = ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, + ARMCP::CPValue, PCAdj); + } else + CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier); + + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg) + .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); + if (IsARM) + MIB.addImm(0); + AddDefaultPred(MIB); + + if (IsPIC) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addImm(ARMPCLabelIndex); + + if (IsARM) + AddDefaultPred(MIB); + } + + MI.eraseFromParent(); + return true; + } case ARM::MOV_ga_pcrel: case ARM::MOV_ga_pcrel_ldr: - case ARM::t2MOV_ga_dyn: case ARM::t2MOV_ga_pcrel: { // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode. unsigned LabelId = AFI->createPICLabelUId(); @@ -912,14 +1027,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, const MachineOperand &MO1 = MI.getOperand(1); const GlobalValue *GV = MO1.getGlobal(); unsigned TF = MO1.getTargetFlags(); - bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode!=ARM::t2MOV_ga_dyn); - bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn); + bool isARM = Opcode != ARM::t2MOV_ga_pcrel; unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel; - unsigned LO16TF = isPIC - ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY; - unsigned HI16TF = isPIC - ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY; + unsigned LO16TF = TF | ARMII::MO_LO16; + unsigned HI16TF = TF | ARMII::MO_HI16; unsigned PICAddOpc = isARM ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) : ARM::tPICADD; @@ -927,16 +1039,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, TII->get(LO16Opc), DstReg) .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF) .addImm(LabelId); - MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(HI16Opc), DstReg) + + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg) .addReg(DstReg) .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF) .addImm(LabelId); - if (!isPIC) { - TransferImpOps(MI, MIB1, MIB2); - MI.eraseFromParent(); - return true; - } MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc)) @@ -1032,33 +1139,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::VDUPfqf: - case ARM::VDUPfdf:{ - unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLN32q : - ARM::VDUPLN32d; - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); - unsigned OpIdx = 0; - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned Lane = TRI->getEncodingValue(SrcReg) & 1; - unsigned DReg = TRI->getMatchingSuperReg(SrcReg, - Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, - &ARM::DPR_VFP2RegClass); - // The lane is [0,1] for the containing DReg superregister. - // Copy the dst/src register operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addReg(DReg); - ++OpIdx; - // Add the lane select operand. - MIB.addImm(Lane); - // Add the predicate operands. - MIB.addOperand(MI.getOperand(OpIdx++)); - MIB.addOperand(MI.getOperand(OpIdx++)); - - TransferImpOps(MI, MIB, MIB); - MI.eraseFromParent(); - return true; - } case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: @@ -1253,7 +1333,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { - MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); + MachineBasicBlock::iterator NMBBI = std::next(MBBI); Modified |= ExpandMI(MBB, MBBI); MBBI = NMBBI; } diff --git a/contrib/llvm/lib/Target/ARM/ARMFPUName.def b/contrib/llvm/lib/Target/ARM/ARMFPUName.def index 9a1bbe703d99..1fef3b3bc5e2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFPUName.def +++ b/contrib/llvm/lib/Target/ARM/ARMFPUName.def @@ -28,5 +28,6 @@ ARM_FPU_NAME("neon", NEON) ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4) ARM_FPU_NAME("neon-fp-armv8", NEON_FP_ARMV8) ARM_FPU_NAME("crypto-neon-fp-armv8", CRYPTO_NEON_FP_ARMV8) +ARM_FPU_NAME("softvfp", SOFTVFP) #undef ARM_FPU_NAME diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index a4004f32db37..e2d90cd9306c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -14,11 +14,12 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" +#include "ARMISelLowering.h" +#include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" -#include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" @@ -30,18 +31,18 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -73,11 +74,12 @@ namespace { } } Address; -class ARMFastISel : public FastISel { +class ARMFastISel final : public FastISel { /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; + Module &M; const TargetMachine &TM; const TargetInstrInfo &TII; const TargetLowering &TLI; @@ -91,6 +93,7 @@ class ARMFastISel : public FastISel { explicit ARMFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo), + M(const_cast<Module&>(*funcInfo.Fn->getParent())), TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()) { @@ -102,8 +105,6 @@ class ARMFastISel : public FastISel { // Code from FastISel.cpp. private: - unsigned FastEmitInst_(unsigned MachineInstOpcode, - const TargetRegisterClass *RC); unsigned FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill); @@ -120,10 +121,6 @@ class ARMFastISel : public FastISel { const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm); - unsigned FastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm); unsigned FastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -132,22 +129,15 @@ class ARMFastISel : public FastISel { unsigned FastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm); - unsigned FastEmitInst_ii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm1, uint64_t Imm2); - - unsigned FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, bool Op0IsKill, - uint32_t Idx); // Backend specific FastISel code. private: - virtual bool TargetSelectInstruction(const Instruction *I); - virtual unsigned TargetMaterializeConstant(const Constant *C); - virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); - virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI); - virtual bool FastLowerArguments(); + bool TargetSelectInstruction(const Instruction *I) override; + unsigned TargetMaterializeConstant(const Constant *C) override; + unsigned TargetMaterializeAlloca(const AllocaInst *AI) override; + bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) override; + bool FastLowerArguments() override; private: #include "ARMGenFastISel.inc" @@ -176,8 +166,6 @@ class ARMFastISel : public FastISel { // Utility routines. private: - unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum, - unsigned Op); bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -201,6 +189,8 @@ class ARMFastISel : public FastISel { unsigned ARMSelectCallOp(bool UseReg); unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); + const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); } + // Call handling routines. private: CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, @@ -293,32 +283,6 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { return MIB; } -unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II, - unsigned Op, unsigned OpNum) { - if (TargetRegisterInfo::isVirtualRegister(Op)) { - const TargetRegisterClass *RegClass = - TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); - if (!MRI.constrainRegClass(Op, RegClass)) { - // If it's not legal to COPY between the register classes, something - // has gone very wrong before we got here. - unsigned NewOp = createResultReg(RegClass); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(TargetOpcode::COPY), NewOp).addReg(Op)); - return NewOp; - } - } - return Op; -} - -unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, - const TargetRegisterClass* RC) { - unsigned ResultReg = createResultReg(RC); - const MCInstrDesc &II = TII.get(MachineInstOpcode); - - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); - return ResultReg; -} - unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { @@ -329,12 +293,12 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, // for this instruction. Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, + ResultReg).addReg(Op0, Op0IsKill * RegState::Kill)); } else { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(II.ImplicitDefs[0])); } @@ -354,14 +318,15 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, Op1 = constrainOperandRegClass(II, Op1, 2); if (II.getNumDefs() >= 1) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill)); + AddOptionalDefs( + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill)); } else { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(II.ImplicitDefs[0])); } @@ -383,16 +348,17 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, Op2 = constrainOperandRegClass(II, Op1, 3); if (II.getNumDefs() >= 1) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addReg(Op2, Op2IsKill * RegState::Kill)); + AddOptionalDefs( + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill)); } else { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(II.ImplicitDefs[0])); } @@ -410,39 +376,15 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, // for this instruction. Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm)); + AddOptionalDefs( + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm)); } else { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(TargetOpcode::COPY), ResultReg) - .addReg(II.ImplicitDefs[0])); - } - return ResultReg; -} - -unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm) { - unsigned ResultReg = createResultReg(RC); - const MCInstrDesc &II = TII.get(MachineInstOpcode); - - // Make sure the input operand is sufficiently constrained to be legal - // for this instruction. - Op0 = constrainOperandRegClass(II, Op0, 1); - if (II.getNumDefs() >= 1) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addFPImm(FPImm)); - } else { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addFPImm(FPImm)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(II.ImplicitDefs[0])); } @@ -462,16 +404,17 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, Op0 = constrainOperandRegClass(II, Op0, 1); Op1 = constrainOperandRegClass(II, Op1, 2); if (II.getNumDefs() >= 1) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm)); + AddOptionalDefs( + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm)); } else { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(II.ImplicitDefs[0])); } @@ -485,58 +428,25 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) - .addImm(Imm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, + ResultReg).addImm(Imm)); } else { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(Imm)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(II.ImplicitDefs[0])); } return ResultReg; } -unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm1, uint64_t Imm2) { - unsigned ResultReg = createResultReg(RC); - const MCInstrDesc &II = TII.get(MachineInstOpcode); - - if (II.getNumDefs() >= 1) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) - .addImm(Imm1).addImm(Imm2)); - } else { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addImm(Imm1).addImm(Imm2)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(TargetOpcode::COPY), - ResultReg) - .addReg(II.ImplicitDefs[0])); - } - return ResultReg; -} - -unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, bool Op0IsKill, - uint32_t Idx) { - unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - assert(TargetRegisterInfo::isVirtualRegister(Op0) && - "Cannot yet extract from physregs"); - - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(TargetOpcode::COPY), ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill), Idx)); - return ResultReg; -} - // TODO: Don't worry about 64-bit now, but when this is fixed remove the // checks from the various callers. unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) { if (VT == MVT::f64) return 0; unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VMOVSR), MoveReg) .addReg(SrcReg)); return MoveReg; @@ -546,7 +456,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) { if (VT == MVT::i64) return 0; unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VMOVRS), MoveReg) .addReg(SrcReg)); return MoveReg; @@ -572,9 +482,8 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) { Opc = ARM::FCONSTS; } unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), - DestReg) - .addImm(Imm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Opc), DestReg).addImm(Imm)); return DestReg; } @@ -582,20 +491,20 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) { if (!Subtarget->hasVFP2()) return false; // MachineConstantPool wants an explicit alignment. - unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); + unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); if (Align == 0) { // TODO: Figure out if this is correct. - Align = TD.getTypeAllocSize(CFP->getType()); + Align = DL.getTypeAllocSize(CFP->getType()); } unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS; // The extra reg is for addrmode5. - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), - DestReg) - .addConstantPoolIndex(Idx) - .addReg(0)); + AddOptionalDefs( + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addConstantPoolIndex(Idx) + .addReg(0)); return DestReg; } @@ -612,7 +521,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; unsigned ImmReg = createResultReg(RC); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) .addImm(CI->getZExtValue())); return ImmReg; @@ -626,7 +535,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { if (UseImm) { unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi; unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) .addImm(Imm)); return ImmReg; @@ -640,24 +549,25 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); // MachineConstantPool wants an explicit alignment. - unsigned Align = TD.getPrefTypeAlignment(C->getType()); + unsigned Align = DL.getPrefTypeAlignment(C->getType()); if (Align == 0) { // TODO: Figure out if this is correct. - Align = TD.getTypeAllocSize(C->getType()); + Align = DL.getTypeAllocSize(C->getType()); } unsigned Idx = MCP.getConstantPoolIndex(C, Align); if (isThumb2) - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::t2LDRpci), DestReg) .addConstantPoolIndex(Idx)); - else + else { // The extra immediate is for addrmode2. DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) .addImm(0)); + } return DestReg; } @@ -673,37 +583,36 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); - // FastISel TLS support on non-Darwin is broken, punt to SelectionDAG. + // FastISel TLS support on non-MachO is broken, punt to SelectionDAG. const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); bool IsThreadLocal = GVar && GVar->isThreadLocal(); - if (!Subtarget->isTargetDarwin() && IsThreadLocal) return 0; + if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0; // Use movw+movt when possible, it avoids constant pool entries. - // Darwin targets don't support movt with Reloc::Static, see - // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support - // static movt relocations. - if (Subtarget->useMovt() && - Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) { + // Non-darwin targets only support static movt relocations in FastISel. + if (Subtarget->useMovt(*FuncInfo.MF) && + (Subtarget->isTargetMachO() || RelocM == Reloc::Static)) { unsigned Opc; + unsigned char TF = 0; + if (Subtarget->isTargetMachO()) + TF = ARMII::MO_NONLAZY; + switch (RelocM) { case Reloc::PIC_: Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel; break; - case Reloc::DynamicNoPIC: - Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn; - break; default: Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm; break; } - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), - DestReg).addGlobalAddress(GV)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF)); } else { // MachineConstantPool wants an explicit alignment. - unsigned Align = TD.getPrefTypeAlignment(GV->getType()); + unsigned Align = DL.getPrefTypeAlignment(GV->getType()); if (Align == 0) { // TODO: Figure out if this is correct. - Align = TD.getTypeAllocSize(GV->getType()); + Align = DL.getTypeAllocSize(GV->getType()); } if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_) @@ -722,18 +631,18 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { MachineInstrBuilder MIB; if (isThumb2) { unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) - .addConstantPoolIndex(Idx); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + DestReg).addConstantPoolIndex(Idx); if (RelocM == Reloc::PIC_) MIB.addImm(Id); AddOptionalDefs(MIB); } else { // The extra immediate is for addrmode2. DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), - DestReg) - .addConstantPoolIndex(Idx) - .addImm(0); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(ARM::LDRcp), DestReg) + .addConstantPoolIndex(Idx) + .addImm(0); AddOptionalDefs(MIB); if (RelocM == Reloc::PIC_) { @@ -741,7 +650,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(Opc), NewDestReg) + DbgLoc, TII.get(Opc), NewDestReg) .addReg(DestReg) .addImm(Id); AddOptionalDefs(MIB); @@ -754,15 +663,15 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { MachineInstrBuilder MIB; unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::t2LDRi12), NewDestReg) .addReg(DestReg) .addImm(0); else - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12), - NewDestReg) - .addReg(DestReg) - .addImm(0); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(ARM::LDRi12), NewDestReg) + .addReg(DestReg) + .addImm(0); DestReg = NewDestReg; AddOptionalDefs(MIB); } @@ -802,10 +711,12 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { // This will get lowered later into the correct offsets and registers // via rewriteXFrameIndex. if (SI != FuncInfo.StaticAllocaMap.end()) { + unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; const TargetRegisterClass* RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); - unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0); + + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addFrameIndex(SI->second) .addImm(0)); @@ -841,7 +752,7 @@ bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { // Computes the address to get to an object. bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { // Some boilerplate from the X86 FastISel. - const User *U = NULL; + const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast<Instruction>(Obj)) { // Don't walk into other basic blocks unless the object is an alloca from @@ -889,11 +800,11 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { i != e; ++i, ++GTI) { const Value *Op = *i; if (StructType *STy = dyn_cast<StructType>(*GTI)) { - const StructLayout *SL = TD.getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { - uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); + uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); for (;;) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. @@ -979,7 +890,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) { (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addFrameIndex(Addr.Base.FI) .addImm(0)); @@ -1130,7 +1041,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, if (allocReg) ResultReg = createResultReg(RC); assert (ResultReg > 255 && "Expected an allocated virtual register."); - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3); @@ -1138,7 +1049,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, // load. Now we must move from the GRP to the FP register. if (needVMOV) { unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VMOVSR), MoveReg) .addReg(ResultReg)); ResultReg = MoveReg; @@ -1180,7 +1091,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, (const TargetRegisterClass*)&ARM::GPRRegClass); unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), Res) .addReg(SrcReg).addImm(1)); SrcReg = Res; @@ -1227,7 +1138,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, // Unaligned stores need special handling. Floats require word-alignment. if (Alignment && Alignment < 4) { unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VMOVRS), MoveReg) .addReg(SrcReg)); SrcReg = MoveReg; @@ -1252,7 +1163,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, // Create the base instruction, then add the operands. SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0); - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(StrOpc)) .addReg(SrcReg); AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3); @@ -1363,9 +1274,9 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { return false; unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); - FastEmitBranch(FBB, DL); + FastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; } @@ -1376,7 +1287,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; unsigned OpReg = getRegForValue(TI->getOperand(0)); OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc)) .addReg(OpReg).addImm(1)); @@ -1387,10 +1298,10 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { } unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); - FastEmitBranch(FBB, DL); + FastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; } @@ -1398,7 +1309,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { dyn_cast<ConstantInt>(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; - FastEmitBranch(Target, DL); + FastEmitBranch(Target, DbgLoc); return true; } @@ -1414,8 +1325,10 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // the one-bit value left in the virtual register. unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) - .addReg(CmpReg).addImm(1)); + AddOptionalDefs( + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc)) + .addReg(CmpReg) + .addImm(1)); unsigned CCMode = ARMCC::NE; if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { @@ -1424,9 +1337,9 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { } unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); - FastEmitBranch(FBB, DL); + FastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; } @@ -1436,8 +1349,8 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { if (AddrReg == 0) return false; unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) - .addReg(AddrReg)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Opc)).addReg(AddrReg)); const IndirectBrInst *IB = cast<IndirectBrInst>(I); for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i) @@ -1470,7 +1383,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, const APInt &CIVal = ConstInt->getValue(); Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather - // then a cmn, because there is no way to represent 2147483648 as a + // then a cmn, because there is no way to represent 2147483648 as a // signed 32-bit int. if (Imm < 0 && Imm != (int)0x80000000) { isNegativeImm = true; @@ -1542,11 +1455,11 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0); if (!UseImm) { SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(SrcReg1).addReg(SrcReg2)); } else { MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(SrcReg1); // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. @@ -1558,7 +1471,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // For floating point we need to move the result to a comparison register // that we can then use for branches. if (Ty->isFloatTy() || Ty->isDoubleTy()) - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::FMSTAT))); return true; } @@ -1586,7 +1499,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); unsigned ZeroReg = TargetMaterializeConstant(Zero); // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg) .addReg(ZeroReg).addImm(1) .addImm(ARMPred).addReg(ARM::CPSR); @@ -1606,7 +1519,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) { if (Op == 0) return false; unsigned Result = createResultReg(&ARM::DPRRegClass); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VCVTDS), Result) .addReg(Op)); UpdateValueMap(I, Result); @@ -1625,7 +1538,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) { if (Op == 0) return false; unsigned Result = createResultReg(&ARM::SPRRegClass); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VCVTSD), Result) .addReg(Op)); UpdateValueMap(I, Result); @@ -1670,9 +1583,8 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { else return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), - ResultReg) - .addReg(FP)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Opc), ResultReg).addReg(FP)); UpdateValueMap(I, ResultReg); return true; } @@ -1697,9 +1609,8 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) { // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg. unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), - ResultReg) - .addReg(Op)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Opc), ResultReg).addReg(Op)); // This result needs to be in an integer register, but the conversion only // takes place in fp-regs. @@ -1746,8 +1657,10 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri; CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(CondReg).addImm(0)); + AddOptionalDefs( + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) + .addReg(CondReg) + .addImm(0)); unsigned MovCCOpc; const TargetRegisterClass *RC; @@ -1765,12 +1678,20 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { if (!UseImm) { Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1); Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) - .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), + ResultReg) + .addReg(Op2Reg) + .addReg(Op1Reg) + .addImm(ARMCC::NE) + .addReg(ARM::CPSR); } else { Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) - .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), + ResultReg) + .addReg(Op1Reg) + .addImm(Imm) + .addImm(ARMCC::EQ) + .addReg(ARM::CPSR); } UpdateValueMap(I, ResultReg); return true; @@ -1859,7 +1780,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass); SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1); SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2)); UpdateValueMap(I, ResultReg); @@ -1901,7 +1822,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { if (Op2 == 0) return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy)); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(Op1).addReg(Op2)); UpdateValueMap(I, ResultReg); @@ -2013,7 +1934,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) .addImm(NumBytes)); @@ -2058,9 +1979,8 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, // Now copy/store arg to correct locations. if (VA.isRegLoc() && !VA.needsCustom()) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - VA.getLocReg()) - .addReg(Arg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg); RegArgs.push_back(VA.getLocReg()); } else if (VA.needsCustom()) { // TODO: We need custom lowering for vector (v2f64) args. @@ -2072,7 +1992,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, assert(VA.isRegLoc() && NextVA.isRegLoc() && "We only handle register args!"); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VMOVRRD), VA.getLocReg()) .addReg(NextVA.getLocReg(), RegState::Define) .addReg(Arg)); @@ -2099,7 +2019,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, unsigned &NumBytes, bool isVarArg) { // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) .addImm(NumBytes).addImm(0)); @@ -2116,7 +2036,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, MVT DestVT = RVLocs[0].getValVT(); const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); unsigned ResultReg = createResultReg(DstRC); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VMOVDRR), ResultReg) .addReg(RVLocs[0].getLocReg()) .addReg(RVLocs[1].getLocReg())); @@ -2137,7 +2057,8 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); unsigned ResultReg = createResultReg(DstRC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(RVLocs[0].getLocReg()); UsedRegs.push_back(RVLocs[0].getLocReg()); @@ -2214,15 +2135,15 @@ bool ARMFastISel::SelectRet(const Instruction *I) { // Avoid a cross-class copy. This is very unlikely. if (!SrcRC->contains(DstReg)) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - DstReg).addReg(SrcReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); // Add register to return instruction. RetRegs.push_back(VA.getLocReg()); } unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET; - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(RetOpc)); AddOptionalDefs(MIB); for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) @@ -2243,8 +2164,9 @@ unsigned ARMFastISel::getLibcallReg(const Twine &Name) { EVT LCREVT = TLI.getValueType(GVTy); if (!LCREVT.isSimple()) return 0; - GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, - GlobalValue::ExternalLinkage, 0, Name); + GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false, + GlobalValue::ExternalLinkage, nullptr, + Name); assert(GV->getType() == GVTy && "We miscomputed the type for the global!"); return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); } @@ -2295,7 +2217,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!isTypeLegal(ArgTy, ArgVT)) return false; ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); Flags.setOrigAlign(OriginalAlignment); Args.push_back(Op); @@ -2320,7 +2242,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Issue the call. unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(CallOpc)); + DbgLoc, TII.get(CallOpc)); // BL / BLX don't take a predicate, but tBL / tBLX do. if (isThumb2) AddDefaultPred(MIB); @@ -2348,7 +2270,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { } bool ARMFastISel::SelectCall(const Instruction *I, - const char *IntrMemName = 0) { + const char *IntrMemName = nullptr) { const CallInst *CI = cast<CallInst>(I); const Value *Callee = CI->getCalledValue(); @@ -2428,7 +2350,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (Arg == 0) return false; - unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); Flags.setOrigAlign(OriginalAlignment); Args.push_back(*i); @@ -2461,7 +2383,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Issue the call. unsigned CallOpc = ARMSelectCallOp(UseReg); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(CallOpc)); + DbgLoc, TII.get(CallOpc)); unsigned char OpFlags = 0; @@ -2578,7 +2500,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue(); while (Depth--) { DestReg = createResultReg(RC); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc), DestReg) .addReg(SrcReg).addImm(0)); SrcReg = DestReg; @@ -2635,7 +2557,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return SelectCall(&I, "memset"); } case Intrinsic::trap: { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get( + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get( Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP)); return true; } @@ -2788,7 +2710,7 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm; bool isKill = 1 == Instr; MachineInstrBuilder MIB = BuildMI( - *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg); + *FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode), ResultReg); if (setsCPSR) MIB.addReg(ARM::CPSR, RegState::Define); SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR); @@ -2866,7 +2788,7 @@ bool ARMFastISel::SelectShift(const Instruction *I, unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass); if(ResultReg == 0) return false; - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(Reg1); @@ -3027,7 +2949,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, // Load value. if (isThumb2) { DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::t2LDRpci), DestReg1) .addConstantPoolIndex(Idx)); Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs; @@ -3035,7 +2957,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, // The extra immediate is for addrmode2. DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(ARM::LDRcp), DestReg1) + DbgLoc, TII.get(ARM::LDRcp), DestReg1) .addConstantPoolIndex(Idx).addImm(0)); Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs; } @@ -3051,7 +2973,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1); GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(Opc), DestReg2) + DbgLoc, TII.get(Opc), DestReg2) .addReg(DestReg1) .addReg(GlobalBaseReg); if (!UseGOTOFF) @@ -3125,7 +3047,8 @@ bool ARMFastISel::FastLowerArguments() { // Without this, EmitLiveInCopies may eliminate the livein if its only // use is a bitcast (which isn't turned into an instruction). unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(DstReg, getKillRegState(true)); UpdateValueMap(I, ResultReg); } @@ -3141,7 +3064,7 @@ namespace llvm { const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl. bool UseFastISel = false; - UseFastISel |= Subtarget->isTargetIOS() && !Subtarget->isThumb1Only(); + UseFastISel |= Subtarget->isTargetMachO() && !Subtarget->isThumb1Only(); UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb(); UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb(); @@ -3153,6 +3076,6 @@ namespace llvm { TM.Options.NoFramePointerElim = true; return new ARMFastISel(funcInfo, libInfo); } - return 0; + return nullptr; } } diff --git a/contrib/llvm/lib/Target/ARM/ARMFeatures.h b/contrib/llvm/lib/Target/ARM/ARMFeatures.h index dafc4b3a82bd..e191a3c779f1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFeatures.h +++ b/contrib/llvm/lib/Target/ARM/ARMFeatures.h @@ -1,4 +1,4 @@ -//===-- ARMFeatures.h - Checks for ARM instruction features ------*- C++ -*-===// +//===-- ARMFeatures.h - Checks for ARM instruction features -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,37 +14,27 @@ #ifndef TARGET_ARM_FEATURES_H #define TARGET_ARM_FEATURES_H -#include "ARM.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" namespace llvm { template<typename InstrType> // could be MachineInstr or MCInst -inline bool isV8EligibleForIT(InstrType *Instr, int BLXOperandIndex = 0) { +bool IsCPSRDead(InstrType *Instr); + +template<typename InstrType> // could be MachineInstr or MCInst +inline bool isV8EligibleForIT(InstrType *Instr) { switch (Instr->getOpcode()) { default: return false; case ARM::tADC: case ARM::tADDi3: case ARM::tADDi8: - case ARM::tADDrSPi: case ARM::tADDrr: case ARM::tAND: case ARM::tASRri: case ARM::tASRrr: case ARM::tBIC: - case ARM::tCMNz: - case ARM::tCMPi8: - case ARM::tCMPr: case ARM::tEOR: - case ARM::tLDRBi: - case ARM::tLDRBr: - case ARM::tLDRHi: - case ARM::tLDRHr: - case ARM::tLDRSB: - case ARM::tLDRSH: - case ARM::tLDRi: - case ARM::tLDRr: - case ARM::tLDRspi: case ARM::tLSLri: case ARM::tLSLrr: case ARM::tLSRri: @@ -56,6 +46,24 @@ inline bool isV8EligibleForIT(InstrType *Instr, int BLXOperandIndex = 0) { case ARM::tROR: case ARM::tRSB: case ARM::tSBC: + case ARM::tSUBi3: + case ARM::tSUBi8: + case ARM::tSUBrr: + // Outside of an IT block, these set CPSR. + return IsCPSRDead(Instr); + case ARM::tADDrSPi: + case ARM::tCMNz: + case ARM::tCMPi8: + case ARM::tCMPr: + case ARM::tLDRBi: + case ARM::tLDRBr: + case ARM::tLDRHi: + case ARM::tLDRHr: + case ARM::tLDRSB: + case ARM::tLDRSH: + case ARM::tLDRi: + case ARM::tLDRr: + case ARM::tLDRspi: case ARM::tSTRBi: case ARM::tSTRBr: case ARM::tSTRHi: @@ -63,21 +71,17 @@ inline bool isV8EligibleForIT(InstrType *Instr, int BLXOperandIndex = 0) { case ARM::tSTRi: case ARM::tSTRr: case ARM::tSTRspi: - case ARM::tSUBi3: - case ARM::tSUBi8: - case ARM::tSUBrr: case ARM::tTST: return true; // there are some "conditionally deprecated" opcodes case ARM::tADDspr: + case ARM::tBLXr: return Instr->getOperand(2).getReg() != ARM::PC; // ADD PC, SP and BLX PC were always unpredictable, // now on top of it they're deprecated case ARM::tADDrSP: case ARM::tBX: return Instr->getOperand(0).getReg() != ARM::PC; - case ARM::tBLXr: - return Instr->getOperand(BLXOperandIndex).getReg() != ARM::PC; case ARM::tADDhirr: return Instr->getOperand(0).getReg() != ARM::PC && Instr->getOperand(2).getReg() != ARM::PC; diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index d32bdbc58939..a67b3600c4fe 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -14,15 +14,18 @@ #include "ARMFrameLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" @@ -36,6 +39,10 @@ static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs); +ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) + : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), + STI(sti) {} + /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -84,7 +91,7 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, - const uint16_t *CSRegs) { + const MCPhysReg *CSRegs) { // Integer spill area is handled with "pop". if (isPopOpcode(MI->getOpcode())) { // The first two operands are predicates. The last two are @@ -129,24 +136,47 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MIFlags, Pred, PredReg); } +static int sizeOfSPAdjustment(const MachineInstr *MI) { + assert(MI->getOpcode() == ARM::VSTMDDB_UPD); + int count = 0; + // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ + // pred) so the list starts at 4. + for (int i = MI->getNumOperands() - 1; i >= 4; --i) + count += 8; + return count; +} + +static bool WindowsRequiresStackProbe(const MachineFunction &MF, + size_t StackSizeInBytes) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->getStackProtectorIndex() > 0) + return StackSizeInBytes >= 4080; + return StackSizeInBytes >= 4096; +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + MachineModuleInfo &MMI = MF.getMMI(); + MCContext &Context = MMI.getContext(); + const TargetMachine &TM = MF.getTarget(); + const MCRegisterInfo *MRI = Context.getRegisterInfo(); const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo()); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = TM.getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); + int CFAOffset = 0; // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. @@ -159,22 +189,46 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; - // Allocate the vararg register save area. This is not counted in NumBytes. - if (ArgRegsSaveSize) + // Allocate the vararg register save area. + if (ArgRegsSaveSize) { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, MachineInstr::FrameSetup); + CFAOffset -= ArgRegsSaveSize; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } - if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, + if (!AFI->hasStackFrame() && + (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) { + if (NumBytes - ArgRegsSaveSize != 0) { + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); + CFAOffset -= NumBytes - ArgRegsSaveSize; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } return; } + // Determine spill area sizes. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.isTargetDarwin()) { + GPRCS2Size += 4; + break; + } + // fallthrough case ARM::R0: case ARM::R1: case ARM::R2: @@ -188,18 +242,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { FramePtrSpillFI = FI; GPRCS1Size += 4; break; - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - if (STI.isTargetIOS()) - GPRCS2Size += 4; - else - GPRCS1Size += 4; - break; default: // This is a DPR. Exclude the aligned DPRCS2 spills. if (Reg == ARM::D8) @@ -210,18 +252,21 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } // Move past area 1. - MachineBasicBlock::iterator LastPush = MBB.end(), FramePtrPush; + MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push, + DPRCSPush; if (GPRCS1Size > 0) - FramePtrPush = LastPush = MBBI++; + GPRCS1Push = LastPush = MBBI++; // Determine starting offsets of spill areas. bool HasFP = hasFP(MF); - unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned DPRCSOffset = NumBytes - (ArgRegsSaveSize + GPRCS1Size + + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; int FramePtrOffsetInPush = 0; if (HasFP) { - FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size; + FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) + + GPRCS1Size + ArgRegsSaveSize; AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -230,13 +275,12 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 2. - if (GPRCS2Size > 0) { - LastPush = MBBI++; - } + if (GPRCS2Size > 0) + GPRCS2Push = LastPush = MBBI++; // Move past area 3. if (DPRCSSize > 0) { - LastPush = MBBI++; + DPRCSPush = MBBI; // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) @@ -254,11 +298,60 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } else NumBytes = DPRCSOffset; + if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { + uint32_t NumWords = NumBytes >> 2; + + if (NumWords < 65536) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup)); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup); + + switch (TM.getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Default: + case CodeModel::Kernel: + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); + break; + case CodeModel::Large: + case CodeModel::JITDefault: + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12) + .addExternalSymbol("__chkstk") + .setMIFlags(MachineInstr::FrameSetup); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(ARM::R12, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), + ARM::SP) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup))); + NumBytes = 0; + } + + unsigned adjustedGPRCS1Size = GPRCS1Size; if (NumBytes) { // Adjust SP after all the callee-save spills. - if (tryFoldSPUpdateIntoPushPop(MF, LastPush, NumBytes)) { - if (LastPush == FramePtrPush) + if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) { + if (LastPush == GPRCS1Push) { FramePtrOffsetInPush += NumBytes; + adjustedGPRCS1Size += NumBytes; + NumBytes = 0; + } } else emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, MachineInstr::FrameSetup); @@ -275,17 +368,138 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } + if (adjustedGPRCS1Size > 0) { + CFAOffset -= adjustedGPRCS1Size; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + MachineBasicBlock::iterator Pos = ++GPRCS1Push; + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + for (const auto &Entry : CSI) { + unsigned Reg = Entry.getReg(); + int FI = Entry.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.isTargetDarwin()) + break; + // fallthrough + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + break; + } + } + } + // Set FP to point to the stack slot that contains the previous FP. // For iOS, FP is R7, which has now been stored in spill area 1. // Otherwise, if this is not iOS, all the callee-saved registers go // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. - if (HasFP) - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, ++FramePtrPush, dl, TII, + if (HasFP) { + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, GPRCS1Push, dl, TII, FramePtr, ARM::SP, FramePtrOffsetInPush, MachineInstr::FrameSetup); + if (FramePtrOffsetInPush) { + CFAOffset += FramePtrOffsetInPush; + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( + nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); + BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } else { + unsigned CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MRI->getDwarfRegNum(FramePtr, true))); + BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } + + if (GPRCS2Size > 0) { + MachineBasicBlock::iterator Pos = ++GPRCS2Push; + if (!HasFP) { + CFAOffset -= GPRCS2Size; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + for (const auto &Entry : CSI) { + unsigned Reg = Entry.getReg(); + int FI = Entry.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.isTargetDarwin()) { + unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); + unsigned Offset = MFI->getObjectOffset(FI); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + break; + } + } + } + + if (DPRCSSize > 0) { + // Since vpush register list cannot have gaps, there may be multiple vpush + // instructions in the prologue. + do { + MachineBasicBlock::iterator Push = DPRCSPush++; + if (!HasFP) { + CFAOffset -= sizeOfSPAdjustment(Push); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD); + + for (const auto &Entry : CSI) { + unsigned Reg = Entry.getReg(); + int FI = Entry.getFrameIdx(); + if ((Reg >= ARM::D0 && Reg <= ARM::D31) && + (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) { + unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); + unsigned Offset = MFI->getObjectOffset(FI); + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } + } + + if (NumBytes) { + if (!HasFP) { + CFAOffset -= NumBytes; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - @@ -378,11 +592,11 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, return; if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); + if (NumBytes - ArgRegsSaveSize != 0) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize); } else { // Unwind MBBI to point to first LDR / VLDRD. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (MBBI != MBB.begin()) { do { --MBBI; @@ -392,7 +606,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, } // Move SP to start of FP callee save spill area. - NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + NumBytes -= (ArgRegsSaveSize + + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); @@ -430,7 +645,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, ARM::SP) .addReg(FramePtr)); } - } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes)) + } else if (NumBytes && + !tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. @@ -453,7 +669,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi) { unsigned TCOpcode = STI.isThumb() ? - (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : + (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : ARM::TAILJMPd; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) @@ -473,7 +689,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, addReg(JumpTarget.getReg(), RegState::Kill); } - MachineInstr *NewMI = prior(MBBI); + MachineInstr *NewMI = std::prev(MBBI); for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); @@ -598,7 +814,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetIOS())) continue; + if (!(Func)(Reg, STI.isTargetDarwin())) continue; // D-registers in the aligned area DPRCS2 are NOT spilled here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -644,6 +860,11 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, AddDefaultPred(MIB); } Regs.clear(); + + // Put any subsequent vpush instructions before this one: they will refer to + // higher register numbers so need to be pushed first in order to preserve + // monotonicity. + --MI; } } @@ -671,7 +892,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetIOS())) continue; + if (!(Func)(Reg, STI.isTargetDarwin())) continue; // The aligned reloads from area DPRCS2 are not inserted here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -727,6 +948,10 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, AddDefaultPred(MIB); } Regs.clear(); + + // Put any subsequent vpop instructions after this one: they will refer to + // higher register numbers so need to be popped afterwards. + ++MI; } } @@ -858,7 +1083,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, } // The last spill instruction inserted should kill the scratch register r4. - llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); + std::prev(MI)->addRegisterKilled(ARM::R4, TRI); } /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an @@ -968,7 +1193,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); // Last store kills r4. - llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); + std::prev(MI)->addRegisterKilled(ARM::R4, TRI); } bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, @@ -1036,12 +1261,9 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII) { unsigned FnSize = 0; - for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - const MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); - I != E; ++I) - FnSize += TII.GetInstSizeInBytes(I); + for (auto &MBB : MF) { + for (auto &MI : MBB) + FnSize += TII.GetInstSizeInBytes(&MI); } return FnSize; } @@ -1054,21 +1276,21 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI) { const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned Limit = (1 << 12) - 1; - for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - if (!I->getOperand(i).isFI()) continue; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (!MI.getOperand(i).isFI()) + continue; // When using ADDri to get the address of a stack object, 255 is the // largest offset guaranteed to fit in the immediate offset. - if (I->getOpcode() == ARM::ADDri) { + if (MI.getOpcode() == ARM::ADDri) { Limit = std::min(Limit, (1U << 8) - 1); break; } // Otherwise check the addressing mode. - switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { + switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) { case ARMII::AddrMode3: case ARMII::AddrModeT2_i8: Limit = std::min(Limit, (1U << 8) - 1); @@ -1205,7 +1427,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; @@ -1220,7 +1442,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (Spilled) { NumGPRSpills++; - if (!STI.isTargetIOS()) { + if (!STI.isTargetDarwin()) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -1242,7 +1464,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, break; } } else { - if (!STI.isTargetIOS()) { + if (!STI.isTargetDarwin()) { UnspilledCS1GPRs.push_back(Reg); continue; } @@ -1317,6 +1539,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (hasFP(MF)) { MRI.setPhysRegUsed(FramePtr); + auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), + FramePtr); + if (FPPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(FPPos); NumGPRSpills++; } @@ -1444,3 +1670,370 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } +/// Get the minimum constant for ARM that is greater than or equal to the +/// argument. In ARM, constants can have any value that can be produced by +/// rotating an 8-bit value to the right by an even number of bits within a +/// 32-bit word. +static uint32_t alignToARMConstant(uint32_t Value) { + unsigned Shifted = 0; + + if (Value == 0) + return 0; + + while (!(Value & 0xC0000000)) { + Value = Value << 2; + Shifted += 2; + } + + bool Carry = (Value & 0x00FFFFFF); + Value = ((Value & 0xFF000000) >> 24) + Carry; + + if (Value & 0x0000100) + Value = Value & 0x000001FC; + + if (Shifted > 24) + Value = Value >> (Shifted - 24); + else + Value = Value << (24 - Shifted); + + return Value; +} + +// The stack limit in the TCB is set to this many bytes above the actual +// stack limit. +static const uint64_t kSplitStackAvailable = 256; + +// Adjust the function prologue to enable split stacks. This currently only +// supports android and linux. +// +// The ABI of the segmented stack prologue is a little arbitrarily chosen, but +// must be well defined in order to allow for consistent implementations of the +// __morestack helper function. The ABI is also not a normal ABI in that it +// doesn't follow the normal calling conventions because this allows the +// prologue of each function to be optimized further. +// +// Currently, the ABI looks like (when calling __morestack) +// +// * r4 holds the minimum stack size requested for this function call +// * r5 holds the stack size of the arguments to the function +// * the beginning of the function is 3 instructions after the call to +// __morestack +// +// Implementations of __morestack should use r4 to allocate a new stack, r5 to +// place the arguments on to the new stack, and the 3-instruction knowledge to +// jump directly to the body of the function when working on the new stack. +// +// An old (and possibly no longer compatible) implementation of __morestack for +// ARM can be found at [1]. +// +// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S +void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { + unsigned Opcode; + unsigned CFIIndex; + const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>(); + bool Thumb = ST->isThumb(); + + // Sadly, this currently doesn't support varargs, platforms other than + // android/linux. Note that thumb1/thumb2 are support for android/linux. + if (MF.getFunction()->isVarArg()) + report_fatal_error("Segmented stacks do not support vararg functions."); + if (!ST->isTargetAndroid() && !ST->isTargetLinux()) + report_fatal_error("Segmented stacks not supported on this platform."); + + MachineBasicBlock &prologueMBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + MCContext &Context = MMI.getContext(); + const MCRegisterInfo *MRI = Context.getRegisterInfo(); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc DL; + + uint64_t StackSize = MFI->getStackSize(); + + // Do not generate a prologue for functions with a stack of size zero + if (StackSize == 0) + return; + + // Use R4 and R5 as scratch registers. + // We save R4 and R5 before use and restore them before leaving the function. + unsigned ScratchReg0 = ARM::R4; + unsigned ScratchReg1 = ARM::R5; + uint64_t AlignedStackSize; + + MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock(); + + for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), + e = prologueMBB.livein_end(); + i != e; ++i) { + AllocMBB->addLiveIn(*i); + GetMBB->addLiveIn(*i); + McrMBB->addLiveIn(*i); + PrevStackMBB->addLiveIn(*i); + PostStackMBB->addLiveIn(*i); + } + + MF.push_front(PostStackMBB); + MF.push_front(AllocMBB); + MF.push_front(GetMBB); + MF.push_front(McrMBB); + MF.push_front(PrevStackMBB); + + // The required stack size that is aligned to ARM constant criterion. + AlignedStackSize = alignToARMConstant(StackSize); + + // When the frame size is less than 256 we just compare the stack + // boundary directly to the value of the stack pointer, per gcc. + bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable; + + // We will use two of the callee save registers as scratch registers so we + // need to save those registers onto the stack. + // We will use SR0 to hold stack limit and SR1 to hold the stack size + // requested and arguments for __morestack(). + // SR0: Scratch Register #0 + // SR1: Scratch Register #1 + // push {SR0, SR1} + if (Thumb) { + AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))) + .addReg(ScratchReg0).addReg(ScratchReg1); + } else { + AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define).addReg(ARM::SP)) + .addReg(ScratchReg0).addReg(ScratchReg1); + } + + // Emit the relevant DWARF information about the change in stack pointer as + // well as where to find both r4 and r5 (the callee-save registers) + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); + BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // mov SR1, sp + if (Thumb) { + AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) + .addReg(ARM::SP)); + } else if (CompareStackPointer) { + AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) + .addReg(ARM::SP)).addReg(0); + } + + // sub SR1, sp, #StackSize + if (!CompareStackPointer && Thumb) { + AddDefaultPred( + AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)) + .addReg(ScratchReg1).addImm(AlignedStackSize)); + } else if (!CompareStackPointer) { + AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) + .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0); + } + + if (Thumb && ST->isThumb1Only()) { + unsigned PCLabelId = ARMFI->createPICLabelUId(); + ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create( + MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0); + MachineConstantPool *MCP = MF.getConstantPool(); + unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment()); + + // ldr SR0, [pc, offset(STACK_LIMIT)] + AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) + .addConstantPoolIndex(CPI)); + + // ldr SR0, [SR0] + AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) + .addReg(ScratchReg0).addImm(0)); + } else { + // Get TLS base address from the coprocessor + // mrc p15, #0, SR0, c13, c0, #3 + AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) + .addImm(15) + .addImm(0) + .addImm(13) + .addImm(0) + .addImm(3)); + + // Use the last tls slot on android and a private field of the TCP on linux. + assert(ST->isTargetAndroid() || ST->isTargetLinux()); + unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1; + + // Get the stack limit from the right offset + // ldr SR0, [sr0, #4 * TlsOffset] + AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) + .addReg(ScratchReg0).addImm(4 * TlsOffset)); + } + + // Compare stack limit with stack size requested. + // cmp SR0, SR1 + Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr; + AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode)) + .addReg(ScratchReg0) + .addReg(ScratchReg1)); + + // This jump is taken if StackLimit < SP - stack required. + Opcode = Thumb ? ARM::tBcc : ARM::Bcc; + BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB) + .addImm(ARMCC::LO) + .addReg(ARM::CPSR); + + + // Calling __morestack(StackSize, Size of stack arguments). + // __morestack knows that the stack size requested is in SR0(r4) + // and amount size of stack arguments is in SR1(r5). + + // Pass first argument for the __morestack by Scratch Register #0. + // The amount size of stack required + if (Thumb) { + AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), + ScratchReg0)).addImm(AlignedStackSize)); + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) + .addImm(AlignedStackSize)).addReg(0); + } + // Pass second argument for the __morestack by Scratch Register #1. + // The amount size of stack consumed to save function arguments. + if (Thumb) { + AddDefaultPred( + AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))); + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))) + .addReg(0); + } + + // push {lr} - Save return address of this function. + if (Thumb) { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))) + .addReg(ARM::LR); + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP)) + .addReg(ARM::LR); + } + + // Emit the DWARF info about the change in stack as well as where to find the + // previous link register + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12)); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12)); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Call __morestack(). + if (Thumb) { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL))) + .addExternalSymbol("__morestack"); + } else { + BuildMI(AllocMBB, DL, TII.get(ARM::BL)) + .addExternalSymbol("__morestack"); + } + + // pop {lr} - Restore return address of this original function. + if (Thumb) { + if (ST->isThumb1Only()) { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) + .addReg(ScratchReg0); + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) + .addReg(ScratchReg0)); + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .addImm(4)); + } + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP)) + .addReg(ARM::LR); + } + + // Restore SR0 and SR1 in case of __morestack() was called. + // __morestack() will skip PostStackMBB block so we need to restore + // scratch registers from here. + // pop {SR0, SR1} + if (Thumb) { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))) + .addReg(ScratchReg0) + .addReg(ScratchReg1); + } else { + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); + } + + // Update the CFA offset now that we've popped + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); + BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // bx lr - Return from this function. + Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET; + AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode))); + + // Restore SR0 and SR1 in case of __morestack() was not called. + // pop {SR0, SR1} + if (Thumb) { + AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))) + .addReg(ScratchReg0) + .addReg(ScratchReg1); + } else { + AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP)) + .addReg(ScratchReg0) + .addReg(ScratchReg1); + } + + // Update the CFA offset now that we've popped + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0)); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Tell debuggers that r4 and r5 are now the same as they were in the + // previous function, that they're the "Same Value". + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( + nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue( + nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); + BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Organizing MBB lists + PostStackMBB->addSuccessor(&prologueMBB); + + AllocMBB->addSuccessor(PostStackMBB); + + GetMBB->addSuccessor(PostStackMBB); + GetMBB->addSuccessor(AllocMBB); + + McrMBB->addSuccessor(GetMBB); + + PrevStackMBB->addSuccessor(McrMBB); + +#ifdef XDEBUG + MF.verify(); +#endif +} diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h index d95a2cb9fd42..709afbcdc681 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -14,8 +14,6 @@ #ifndef ARM_FRAMEINFO_H #define ARM_FRAMEINFO_H -#include "ARM.h" -#include "ARMSubtarget.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -26,38 +24,36 @@ protected: const ARMSubtarget &STI; public: - explicit ARMFrameLowering(const ARMSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 8), - STI(sti) { - } + explicit ARMFrameLowering(const ARMSubtarget &sti); /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const override; - bool hasFP(const MachineFunction &MF) const; - bool hasReservedCallFrame(const MachineFunction &MF) const; - bool canSimplifyCallFramePseudos(const MachineFunction &MF) const; + bool hasFP(const MachineFunction &MF) const override; + bool hasReservedCallFrame(const MachineFunction &MF) const override; + bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; int getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const; - int ResolveFrameIndexReference(const MachineFunction &MF, - int FI, + unsigned &FrameReg) const override; + int ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const; - int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; + RegScavenger *RS) const override; + + void adjustForSegmentedStacks(MachineFunction &MF) const override; private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -71,10 +67,10 @@ public: bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const; - virtual void eliminateCallFramePseudoInstr( - MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; + void + eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index c69d313fd9ce..0885c4e5721a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -57,7 +57,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { - I = llvm::prior(I); + I = std::prev(I); DefMI = &*I; } } @@ -77,7 +77,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { } void ARMHazardRecognizer::Reset() { - LastMI = 0; + LastMI = nullptr; FpMLxStalls = 0; ScoreboardHazardRecognizer::Reset(); } @@ -95,7 +95,7 @@ void ARMHazardRecognizer::EmitInstruction(SUnit *SU) { void ARMHazardRecognizer::AdvanceCycle() { if (FpMLxStalls && --FpMLxStalls == 0) // Stalled for 4 cycles but still can't schedule any other instructions. - LastMI = 0; + LastMI = nullptr; ScoreboardHazardRecognizer::AdvanceCycle(); } diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h index e1dcec3d1cc8..a8198e26703e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h +++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h @@ -35,13 +35,13 @@ public: ARMHazardRecognizer(const InstrItineraryData *ItinData, const ScheduleDAG *DAG) : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), - LastMI(0) {} + LastMI(nullptr) {} - virtual HazardType getHazardType(SUnit *SU, int Stalls); - virtual void Reset(); - virtual void EmitInstruction(SUnit *SU); - virtual void AdvanceCycle(); - virtual void RecedeCycle(); + HazardType getHazardType(SUnit *SU, int Stalls) override; + void Reset() override; + void EmitInstruction(SUnit *SU) override; + void AdvanceCycle() override; + void RecedeCycle() override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 6d9b18877f72..38547cfae2e0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMTargetMachine.h" @@ -32,12 +31,13 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; +#define DEBUG_TYPE "arm-isel" + static cl::opt<bool> DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), @@ -60,24 +60,26 @@ enum AddrMode2Type { }; class ARMDAGToDAGISel : public SelectionDAGISel { - ARMBaseTargetMachine &TM; - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; public: - explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, - CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), TM(tm), - Subtarget(&TM.getSubtarget<ARMSubtarget>()) { + explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + Subtarget = &MF.getTarget().getSubtarget<ARMSubtarget>(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; } - virtual const char *getPassName() const { + const char *getPassName() const override { return "ARM Instruction Selection"; } - virtual void PreprocessISelDAG(); + void PreprocessISelDAG() override; /// getI32Imm - Return a target constant of type i32 with the specified /// value. @@ -85,7 +87,7 @@ public: return CurDAG->getTargetConstant(Imm, MVT::i32); } - SDNode *Select(SDNode *N); + SDNode *Select(SDNode *N) override; bool hasNoVMLxHazardUse(SDNode *N) const; @@ -253,13 +255,10 @@ private: SDNode *SelectConcatVector(SDNode *N); - SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64); - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); + bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + std::vector<SDValue> &OutOps) override; // Form pairs of consecutive R, S, D, or Q registers. SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); @@ -401,7 +400,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() { N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, N1, CurDAG->getConstant(TZ, MVT::i32)); CurDAG->UpdateNodeOperands(N, N0, N1); - } + } } /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS @@ -414,8 +413,8 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (!CheckVMLxHazard) return true; - if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9() && - !Subtarget->isSwift()) + if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() && + !Subtarget->isCortexA9() && !Subtarget->isSwift()) return true; if (!N->hasOneUse()) @@ -425,8 +424,8 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (Use->getOpcode() == ISD::CopyToReg) return true; if (Use->isMachineOpcode()) { - const ARMBaseInstrInfo *TII = - static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( + CurDAG->getTarget().getInstrInfo()); const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); if (MCID.mayStore()) @@ -534,8 +533,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, } if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); } else Base = N; @@ -702,8 +700,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -963,8 +960,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); } Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), @@ -1141,8 +1137,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, if (!CurDAG->isBaseWithConstantOffset(N)) { if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); } else { Base = N; @@ -1278,8 +1273,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, } if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::TargetConstantPool) return false; // We want to select t2LDRpci instead. @@ -1412,7 +1406,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm) { - // This *must* succeed since it's used for the irreplacable ldrex and strex + // This *must* succeed since it's used for the irreplaceable ldrex and strex // instructions. Base = N; OffImm = CurDAG->getTargetConstant(0, MVT::i32); @@ -1449,7 +1443,7 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) - return NULL; + return nullptr; EVT LoadedVT = LD->getMemoryVT(); SDValue Offset, AMOpc; @@ -1515,14 +1509,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { } } - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) - return NULL; + return nullptr; EVT LoadedVT = LD->getMemoryVT(); bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; @@ -1549,7 +1543,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; break; default: - return NULL; + return nullptr; } Match = true; } @@ -1563,7 +1557,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { MVT::Other, Ops); } - return NULL; + return nullptr; } /// \brief Form a GPRPair pseudo register from a pair of GPR regs. @@ -1708,10 +1702,10 @@ static bool isVSTfixed(unsigned Opc) case ARM::VST1d16wb_fixed : return true; case ARM::VST1d32wb_fixed : return true; case ARM::VST1d64wb_fixed : return true; - case ARM::VST1q8wb_fixed : return true; - case ARM::VST1q16wb_fixed : return true; - case ARM::VST1q32wb_fixed : return true; - case ARM::VST1q64wb_fixed : return true; + case ARM::VST1q8wb_fixed : return true; + case ARM::VST1q16wb_fixed : return true; + case ARM::VST1q32wb_fixed : return true; + case ARM::VST1q64wb_fixed : return true; case ARM::VST1d64TPseudoWB_fixed : return true; case ARM::VST1d64QPseudoWB_fixed : return true; case ARM::VST2d8wb_fixed : return true; @@ -1785,7 +1779,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return NULL; + return nullptr; SDValue Chain = N->getOperand(0); EVT VT = N->getValueType(0); @@ -1904,7 +1898,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, @@ -1918,7 +1912,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, unsigned AddrOpIdx = isUpdating ? 1 : 2; unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return NULL; + return nullptr; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -2064,7 +2058,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned AddrOpIdx = isUpdating ? 1 : 2; unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return NULL; + return nullptr; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -2169,7 +2163,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, @@ -2180,7 +2174,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, SDValue MemAddr, Align; if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) - return NULL; + return nullptr; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -2252,7 +2246,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, @@ -2291,7 +2285,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) - return NULL; + return nullptr; unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) @@ -2304,7 +2298,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, // The immediate is a mask of the low bits iff imm & (imm+1) == 0 if (And_imm & (And_imm + 1)) - return NULL; + return nullptr; unsigned Srl_imm = 0; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, @@ -2324,7 +2318,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } // ARM models shift instructions as MOVsi with shifter operand. @@ -2334,17 +2328,17 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, MVT::i32); SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); } SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), - getAL(CurDAG), Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + getAL(CurDAG), Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } } - return NULL; + return nullptr; } // Otherwise, we're looking for a shift of a shift @@ -2358,16 +2352,16 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Width = 32 - Srl_imm - 1; int LSB = Srl_imm - Shl_imm; if (LSB < 0) - return NULL; + return nullptr; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), getAL(CurDAG), Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } } - return NULL; + return nullptr; } /// Target-specific DAG combining for ISD::XOR. @@ -2386,10 +2380,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ EVT VT = N->getValueType(0); if (Subtarget->isThumb1Only()) - return NULL; + return nullptr; if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) - return NULL; + return nullptr; SDValue ADDSrc0 = XORSrc0.getOperand(0); SDValue ADDSrc1 = XORSrc0.getOperand(1); @@ -2400,13 +2394,13 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ unsigned Size = XType.getSizeInBits() - 1; if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && - XType.isInteger() && SRAConstant != NULL && + XType.isInteger() && SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); } - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { @@ -2418,44 +2412,12 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); } -SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, - unsigned Op16,unsigned Op32, - unsigned Op64) { - // Mostly direct translation to the given operations, except that we preserve - // the AtomicOrdering for use later on. - AtomicSDNode *AN = cast<AtomicSDNode>(Node); - EVT VT = AN->getMemoryVT(); - - unsigned Op; - SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other); - if (VT == MVT::i8) - Op = Op8; - else if (VT == MVT::i16) - Op = Op16; - else if (VT == MVT::i32) - Op = Op32; - else if (VT == MVT::i64) { - Op = Op64; - VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other); - } else - llvm_unreachable("Unexpected atomic operation"); - - SmallVector<SDValue, 6> Ops; - for (unsigned i = 1; i < AN->getNumOperands(); ++i) - Ops.push_back(AN->getOperand(i)); - - Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); - Ops.push_back(AN->getOperand(0)); // Chain moves to the end - - return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size()); -} - SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { N->setNodeId(-1); - return NULL; // Already selected. + return nullptr; // Already selected. } switch (N->getOpcode()) { @@ -2477,19 +2439,21 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::Constant: { unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); bool UseCP = true; - if (Subtarget->hasThumb2()) + if (Subtarget->useMovt(*MF)) // Thumb2-aware targets have the MOVT instruction, so all immediates can // be done with MOV + MOVT, at worst. - UseCP = 0; + UseCP = false; else { if (Subtarget->isThumb()) { - UseCP = (Val > 255 && // MOV - ~Val > 255 && // MOV + MVN - !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL + UseCP = (Val > 255 && // MOV + ~Val > 255 && // MOV + MVN + !ARM_AM::isThumbImmShiftedVal(Val) && // MOV + LSL + !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW } else - UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV - ARM_AM::getSOImmVal(~Val) == -1 && // MVN - !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs. + UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV + ARM_AM::getSOImmVal(~Val) == -1 && // MVN + !ARM_AM::isSOImmTwoPartVal(Val) && // two instrs. + !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW } if (UseCP) { @@ -2499,7 +2463,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { getTargetLowering()->getPointerTy()); SDNode *ResNode; - if (Subtarget->isThumb1Only()) { + if (Subtarget->isThumb()) { SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; @@ -2517,7 +2481,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops); } ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); - return NULL; + return nullptr; } // Other cases are autogenerated. @@ -2531,14 +2495,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (Subtarget->isThumb1Only()) { SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops); } else { unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? ARM::t2ADDri : ARM::ADDri); SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } } case ISD::SRL: @@ -2565,10 +2529,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (Subtarget->isThumb()) { SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6); + return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); } } if (isPowerOf2_32(RHSV+1)) { // 2^n-1? @@ -2581,10 +2545,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (Subtarget->isThumb()) { SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6); + return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); } } } @@ -2699,7 +2663,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } } case ISD::LOAD: { - SDNode *ResNode = 0; + SDNode *ResNode = nullptr; if (Subtarget->isThumb() && Subtarget->hasThumb2()) ResNode = SelectT2IndexedLoad(N); else @@ -2746,13 +2710,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } ReplaceUses(SDValue(N, 0), SDValue(Chain.getNode(), Chain.getResNo())); - return NULL; + return nullptr; } case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return NULL; + default: return nullptr; case MVT::v8i8: Opc = ARM::VZIPd8; break; case MVT::v4i16: Opc = ARM::VZIPd16; break; case MVT::v2f32: @@ -2772,7 +2736,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return NULL; + default: return nullptr; case MVT::v8i8: Opc = ARM::VUZPd8; break; case MVT::v4i16: Opc = ARM::VUZPd16; break; case MVT::v2f32: @@ -2792,7 +2756,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return NULL; + default: return nullptr; case MVT::v8i8: Opc = ARM::VTRNd8; break; case MVT::v4i16: Opc = ARM::VTRNd16; break; case MVT::v2f32: @@ -2873,7 +2837,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, ARM::VLD1q64wb_fixed }; - return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); + return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); } case ARMISD::VLD2_UPD: { @@ -2884,7 +2848,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q32PseudoWB_fixed }; - return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); + return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); } case ARMISD::VLD3_UPD: { @@ -2951,7 +2915,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST1q16wb_fixed, ARM::VST1q32wb_fixed, ARM::VST1q64wb_fixed }; - return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); + return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); } case ARMISD::VST2_UPD: { @@ -2962,7 +2926,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, ARM::VST2q16PseudoWB_fixed, ARM::VST2q32PseudoWB_fixed }; - return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); + return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); } case ARMISD::VST3_UPD: { @@ -3027,13 +2991,16 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { default: break; + case Intrinsic::arm_ldaexd: case Intrinsic::arm_ldrexd: { - SDValue MemAddr = N->getOperand(2); SDLoc dl(N); SDValue Chain = N->getOperand(0); - + SDValue MemAddr = N->getOperand(2); bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); - unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD; + + bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; + unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) + : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); // arm_ldrexd returns a i64 value in {i32, i32} std::vector<EVT> ResTys; @@ -3083,9 +3050,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ReplaceUses(SDValue(N, 1), Result); } ReplaceUses(SDValue(N, 2), OutChain); - return NULL; + return nullptr; } - + case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: { SDLoc dl(N); SDValue Chain = N->getOperand(0); @@ -3111,7 +3078,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(CurDAG->getRegister(0, MVT::i32)); Ops.push_back(Chain); - unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD; + bool IsRelease = IntNo == Intrinsic::arm_stlexd; + unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) + : (IsRelease ? ARM::STLEXD : ARM::STREXD); SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); // Transfer memoperands. @@ -3127,7 +3096,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD1d32, ARM::VLD1d64 }; static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, ARM::VLD1q32, ARM::VLD1q64}; - return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0); + return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vld2: { @@ -3135,7 +3104,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD2d32, ARM::VLD1q64 }; static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, ARM::VLD2q32Pseudo }; - return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0); + return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vld3: { @@ -3198,7 +3167,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST1d32, ARM::VST1d64 }; static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, ARM::VST1q32, ARM::VST1q64 }; - return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0); + return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vst2: { @@ -3206,7 +3175,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST2d32, ARM::VST1q64 }; static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, ARM::VST2q32Pseudo }; - return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0); + return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vst3: { @@ -3320,91 +3289,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::CONCAT_VECTORS: return SelectConcatVector(N); - - case ISD::ATOMIC_LOAD: - if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64) - return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64); - else - break; - - case ISD::ATOMIC_STORE: - if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64) - return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_STORE_I64); - else - break; - - case ISD::ATOMIC_LOAD_ADD: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_ADD_I8, - ARM::ATOMIC_LOAD_ADD_I16, - ARM::ATOMIC_LOAD_ADD_I32, - ARM::ATOMIC_LOAD_ADD_I64); - case ISD::ATOMIC_LOAD_SUB: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_SUB_I8, - ARM::ATOMIC_LOAD_SUB_I16, - ARM::ATOMIC_LOAD_SUB_I32, - ARM::ATOMIC_LOAD_SUB_I64); - case ISD::ATOMIC_LOAD_AND: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_AND_I8, - ARM::ATOMIC_LOAD_AND_I16, - ARM::ATOMIC_LOAD_AND_I32, - ARM::ATOMIC_LOAD_AND_I64); - case ISD::ATOMIC_LOAD_OR: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_OR_I8, - ARM::ATOMIC_LOAD_OR_I16, - ARM::ATOMIC_LOAD_OR_I32, - ARM::ATOMIC_LOAD_OR_I64); - case ISD::ATOMIC_LOAD_XOR: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_XOR_I8, - ARM::ATOMIC_LOAD_XOR_I16, - ARM::ATOMIC_LOAD_XOR_I32, - ARM::ATOMIC_LOAD_XOR_I64); - case ISD::ATOMIC_LOAD_NAND: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_NAND_I8, - ARM::ATOMIC_LOAD_NAND_I16, - ARM::ATOMIC_LOAD_NAND_I32, - ARM::ATOMIC_LOAD_NAND_I64); - case ISD::ATOMIC_LOAD_MIN: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_MIN_I8, - ARM::ATOMIC_LOAD_MIN_I16, - ARM::ATOMIC_LOAD_MIN_I32, - ARM::ATOMIC_LOAD_MIN_I64); - case ISD::ATOMIC_LOAD_MAX: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_MAX_I8, - ARM::ATOMIC_LOAD_MAX_I16, - ARM::ATOMIC_LOAD_MAX_I32, - ARM::ATOMIC_LOAD_MAX_I64); - case ISD::ATOMIC_LOAD_UMIN: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_UMIN_I8, - ARM::ATOMIC_LOAD_UMIN_I16, - ARM::ATOMIC_LOAD_UMIN_I32, - ARM::ATOMIC_LOAD_UMIN_I64); - case ISD::ATOMIC_LOAD_UMAX: - return SelectAtomic(N, - ARM::ATOMIC_LOAD_UMAX_I8, - ARM::ATOMIC_LOAD_UMAX_I16, - ARM::ATOMIC_LOAD_UMAX_I32, - ARM::ATOMIC_LOAD_UMAX_I64); - case ISD::ATOMIC_SWAP: - return SelectAtomic(N, - ARM::ATOMIC_SWAP_I8, - ARM::ATOMIC_SWAP_I16, - ARM::ATOMIC_SWAP_I32, - ARM::ATOMIC_SWAP_I64); - case ISD::ATOMIC_CMP_SWAP: - return SelectAtomic(N, - ARM::ATOMIC_CMP_SWAP_I8, - ARM::ATOMIC_CMP_SWAP_I16, - ARM::ATOMIC_CMP_SWAP_I32, - ARM::ATOMIC_CMP_SWAP_I64); } return SelectCode(N); @@ -3425,7 +3309,8 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ // them into a GPRPair. SDLoc dl(N); - SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) + : SDValue(nullptr,0); SmallVector<bool, 8> OpChanged; // Glue node will be appended late. @@ -3507,7 +3392,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ // Update the original glue user. std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); Ops.push_back(T1.getValue(1)); - CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(GU, Ops); GU = T1.getNode(); } else { @@ -3554,11 +3439,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ if (Glue.getNode()) AsmNodeOperands.push_back(Glue); if (!Changed) - return NULL; + return nullptr; SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), - CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], - AsmNodeOperands.size()); + CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); New->setNodeId(-1); return New.getNode(); } diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index f3be81877e52..a76531a3869a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -12,9 +12,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-isel" #include "ARMISelLowering.h" -#include "ARM.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" @@ -38,29 +36,26 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include <utility> using namespace llvm; +#define DEBUG_TYPE "arm-isel" + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); -// This option should go away when tail calls fully work. -static cl::opt<bool> -EnableARMTailCalls("arm-tail-calls", cl::Hidden, - cl::desc("Generate tail calls (TEMPORARY OPTION)."), - cl::init(false)); - cl::opt<bool> EnableARMLongCalls("arm-long-calls", cl::Hidden, cl::desc("Generate calls via indirect call instructions"), @@ -87,7 +82,7 @@ namespace { } // The APCS parameter registers. -static const uint16_t GPRArgRegs[] = { +static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -160,25 +155,26 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } -static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { - if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + if (TT.isOSBinFormatMachO()) return new TargetLoweringObjectFileMachO(); - + if (TT.isOSWindows()) + return new TargetLoweringObjectFileCOFF(); return new ARMElfTargetObjectFile(); } ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)) { + : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); RegInfo = TM.getRegisterInfo(); Itins = TM.getInstrItineraryData(); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - if (Subtarget->isTargetIOS()) { + if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2() && - Subtarget->hasARMOps()) { + Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) { // Single-precision floating-point arithmetic. setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); @@ -254,172 +250,134 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // These libcalls are not available in 32-bit. - setLibcallName(RTLIB::SHL_I128, 0); - setLibcallName(RTLIB::SRL_I128, 0); - setLibcallName(RTLIB::SRA_I128, 0); - - if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { - // Double-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 2 - setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); - setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); - setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); - setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); - setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); - - // Double-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 3 - setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); - setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); - setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); - setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); - setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); - setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); - setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); - setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); - - // Single-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 4 - setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); - setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); - setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); - setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); - setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); - - // Single-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 5 - setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); - setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); - setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); - setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); - setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); - setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); - setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); - setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); - - // Floating-point to integer conversions. - // RTABI chapter 4.1.2, Table 6 - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); - setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); - setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); - setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); - setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); - setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); - setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); - - // Conversions between floating types. - // RTABI chapter 4.1.2, Table 7 - setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); - setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); - setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); - - // Integer to floating-point conversions. - // RTABI chapter 4.1.2, Table 8 - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); - setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); - setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); - setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); - setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - - // Long long helper functions - // RTABI chapter 4.2, Table 9 - setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); - setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); - setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); - setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); - setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); - - // Integer division functions - // RTABI chapter 4.3.1 - setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); - setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - - // Memory operations - // RTABI chapter 4.3.4 - setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); - setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); - setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); - setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); + + if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && + !Subtarget->isTargetWindows()) { + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } LibraryCalls[] = { + // Double-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 2 + { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Double-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 3 + { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Single-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 4 + { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Single-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 5 + { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Floating-point to integer conversions. + // RTABI chapter 4.1.2, Table 6 + { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Conversions between floating types. + // RTABI chapter 4.1.2, Table 7 + { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer to floating-point conversions. + // RTABI chapter 4.1.2, Table 8 + { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Long long helper functions + // RTABI chapter 4.2, Table 9 + { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer division functions + // RTABI chapter 4.3.1 + { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Memory operations + // RTABI chapter 4.3.4 + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + } + + if (Subtarget->isTargetWindows()) { + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + } LibraryCalls[] = { + { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + } } // Use divmod compiler-rt calls for iOS 5.0 and later. @@ -438,8 +396,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::f32, &ARM::SPRRegClass); if (!Subtarget->isFPOnlySP()) addRegisterClass(MVT::f64, &ARM::DPRRegClass); - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); } for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -451,6 +407,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); @@ -617,8 +580,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) computeRegisterProperties(); - // ARM does not have f32 extending load. + // ARM does not have floating-point extending loads. setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + + // ... or truncating stores + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); // ARM does not have i1 sign extending load. setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -638,6 +607,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + setOperationAction(ISD::SADDO, MVT::i32, Custom); + setOperationAction(ISD::UADDO, MVT::i32, Custom); + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + setOperationAction(ISD::USUBO, MVT::i32, Custom); + // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -733,39 +707,31 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (!Subtarget->isTargetDarwin()) { - // Non-Darwin platforms may return values in these registers via the + if (!Subtarget->isTargetMachO()) { + // Non-MachO platforms may return values in these registers via the // personality function. setExceptionPointerRegister(ARM::R0); setExceptionSelectorRegister(ARM::R1); } - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { - // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and - // handled normally. + // ATOMIC_FENCE needs custom lowering; the others should have been expanded + // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - // Custom lowering for 64-bit ops - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); + // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. if (!Subtarget->hasV8Ops()) { // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. setInsertFencesForAtomic(true); } - setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); } else { // If there's anything we can use as a barrier, go through custom lowering // for ATOMIC_FENCE. @@ -863,13 +829,20 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } - // Special handling for half-precision FP. + + // v8 adds f64 <-> f16 conversion. Before that it should be expanded. + if (!Subtarget->hasV8Ops()) { + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + } + + // fp16 is a special v7 extension that adds f16 <-> f32 conversions. if (!Subtarget->hasFP16()) { - setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); - setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } } - + // Combine sin / cos into one node or libcall if possible. if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); @@ -920,44 +893,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, - bool isThumb2, unsigned &LdrOpc, - unsigned &StrOpc) { - static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB}, - {ARM::LDREXH, ARM::t2LDREXH}, - {ARM::LDREX, ARM::t2LDREX}, - {ARM::LDREXD, ARM::t2LDREXD}}; - static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB}, - {ARM::LDAEXH, ARM::t2LDAEXH}, - {ARM::LDAEX, ARM::t2LDAEX}, - {ARM::LDAEXD, ARM::t2LDAEXD}}; - static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB}, - {ARM::STREXH, ARM::t2STREXH}, - {ARM::STREX, ARM::t2STREX}, - {ARM::STREXD, ARM::t2STREXD}}; - static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB}, - {ARM::STLEXH, ARM::t2STLEXH}, - {ARM::STLEX, ARM::t2STLEX}, - {ARM::STLEXD, ARM::t2STLEXD}}; - - const unsigned (*LoadOps)[2], (*StoreOps)[2]; - if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) - LoadOps = LoadAcqs; - else - LoadOps = LoadBares; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - StoreOps = StoreRels; - else - StoreOps = StoreBares; - - assert(isPowerOf2_32(Size) && Size <= 8 && - "unsupported size for atomic binary op!"); - - LdrOpc = LoadOps[Log2_32(Size)][isThumb2]; - StrOpc = StoreOps[Log2_32(Size)][isThumb2]; -} - // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, @@ -970,7 +905,7 @@ static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, // and extractions. std::pair<const TargetRegisterClass*, uint8_t> ARMTargetLowering::findRepresentativeClass(MVT VT) const{ - const TargetRegisterClass *RRC = 0; + const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: @@ -1007,9 +942,8 @@ ARMTargetLowering::findRepresentativeClass(MVT VT) const{ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case ARMISD::Wrapper: return "ARMISD::Wrapper"; - case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; case ARMISD::CALL: return "ARMISD::CALL"; @@ -1064,6 +998,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; + case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; @@ -1079,10 +1015,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VSHL: return "ARMISD::VSHL"; case ARMISD::VSHRs: return "ARMISD::VSHRs"; case ARMISD::VSHRu: return "ARMISD::VSHRu"; - case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; - case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; - case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; - case ARMISD::VSHRN: return "ARMISD::VSHRN"; case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; @@ -1266,40 +1198,58 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, #include "ARMGenCallingConv.inc" -/// CCAssignFnForNode - Selects the correct CCAssignFn for a the -/// given CallingConvention value. -CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, - bool Return, - bool isVarArg) const { +/// getEffectiveCallingConv - Get the effective calling convention, taking into +/// account presence of floating point hardware and calling convention +/// limitations, such as support for variadic functions. +CallingConv::ID +ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - if (Subtarget->hasVFP2() && !isVarArg) { - if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); - // For AAPCS ABI targets, just use VFP variant of the calling convention. - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - } - // Fallthrough - case CallingConv::C: { - // Use target triple & subtarget features to do actual dispatch. + case CallingConv::ARM_AAPCS: + case CallingConv::ARM_APCS: + case CallingConv::GHC: + return CC; + case CallingConv::ARM_AAPCS_VFP: + return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; + case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); - else if (Subtarget->hasVFP2() && + return CallingConv::ARM_APCS; + else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; + case CallingConv::Fast: + if (!Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + return CallingConv::Fast; + return CallingConv::ARM_APCS; + } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; } - case CallingConv::ARM_AAPCS_VFP: - if (!isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - // Fallthrough - case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); +} + +/// CCAssignFnForNode - Selects the correct CCAssignFn for the given +/// CallingConvention. +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, + bool Return, + bool isVarArg) const { + switch (getEffectiveCallingConv(CC, isVarArg)) { + default: + llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + case CallingConv::Fast: + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } @@ -1348,6 +1298,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { @@ -1363,6 +1315,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, MVT::i32)); @@ -1413,16 +1367,17 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + unsigned id = Subtarget->isLittle() ? 0 : 1; + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); if (NextVA.isRegLoc()) - RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); else { assert(NextVA.isMemLoc()); - if (StackPtr.getNode() == 0) + if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, Flags)); } @@ -1450,14 +1405,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; bool isSibCall = false; + // Disable tail calls if they're not supported. - if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) + if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls) isTailCall = false; + if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); + if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { @@ -1602,7 +1562,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, - Ops, array_lengthof(Ops))); + Ops)); } } else if (!isSibCall) { assert(VA.isMemLoc()); @@ -1613,8 +1573,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. @@ -1656,8 +1615,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); if (EnableARMLongCalls) { - assert (getTargetMachine().getRelocationModel() == Reloc::Static - && "long-calls with non-static relocation model!"); + assert((Subtarget->isTargetWindows() || + getTargetMachine().getRelocationModel() == Reloc::Static) && + "long-calls with non-static relocation model!"); // Handle a global address or an external symbol. If it's not one of // those, the target's already in a register, so we don't need to do // anything extra. @@ -1695,25 +1655,29 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const GlobalValue *GV = G->getGlobal(); isDirect = true; bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); - bool isStub = (isExt && Subtarget->isTargetDarwin()) && + bool isStub = (isExt && Subtarget->isTargetMachO()) && getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; // ARM call to a local ARM function is predicable. isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); // tBX takes a register source operand. - if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); - Callee = DAG.getNode(ARMISD::PIC_ADD, dl, - getPointerTy(), Callee, PICLabel); + if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); + Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(), + DAG.getTargetGlobalAddress(GV, dl, getPointerTy())); + } else if (Subtarget->isTargetCOFF()) { + assert(Subtarget->isTargetWindows() && + "Windows is the only supported COFF target"); + unsigned TargetFlags = GV->hasDLLImportStorageClass() + ? ARMII::MO_DLLIMPORT + : ARMII::MO_NO_FLAG; + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0, + TargetFlags); + if (GV->hasDLLImportStorageClass()) + Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(), + Callee), MachinePointerInfo::getGOT(), + false, false, false, 0); } else { // On ELF targets for PIC code, direct calls should go through the PLT unsigned OpFlags = 0; @@ -1724,7 +1688,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { isDirect = true; - bool isStub = Subtarget->isTargetDarwin() && + bool isStub = Subtarget->isTargetMachO() && getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; // tBX takes a register source operand. @@ -1755,8 +1719,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; - bool HasMinSizeAttr = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -1811,10 +1775,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) - return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -1841,22 +1805,6 @@ ARMTargetLowering::HandleByVal( State->getCallOrPrologue() == Call) && "unhandled ParmContext"); - // For in-prologue parameters handling, we also introduce stack offset - // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. - // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how - // NSAA should be evaluted (NSAA means "next stacked argument address"). - // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. - // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. - unsigned NSAAOffset = State->getNextStackOffset(); - if (State->getCallOrPrologue() != Call) { - for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { - unsigned RB, RE; - State->getInRegsParamInfo(i, RB, RE); - assert(NSAAOffset >= (RE-RB)*4 && - "Stack offset for byval regs doesn't introduced anymore?"); - NSAAOffset -= (RE-RB)*4; - } - } if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { if (Subtarget->isAAPCS_ABI() && Align > 4) { unsigned AlignInRegs = Align / 4; @@ -1871,6 +1819,7 @@ ARMTargetLowering::HandleByVal( // all remained GPR regs. In that case we can't split parameter, we must // send it to stack. We also must set NCRN to R4, so waste all // remained registers. + const unsigned NSAAOffset = State->getNextStackOffset(); if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { while (State->AllocateReg(GPRArgRegs, 4)) ; @@ -1890,18 +1839,14 @@ ARMTargetLowering::HandleByVal( // allocate remained amount of registers we need. for (unsigned i = reg+1; i != ByValRegEnd; ++i) State->AllocateReg(GPRArgRegs, 4); - // At a call site, a byval parameter that is split between - // registers and memory needs its size truncated here. In a - // function prologue, such byval parameters are reassembled in - // memory, and are not truncated. - if (State->getCallOrPrologue() == Call) { - // Make remained size equal to 0 in case, when - // the whole structure may be stored into registers. - if (size < excess) - size = 0; - else - size -= excess; - } + // A byval parameter that is split between registers and memory needs its + // size truncated here. + // In the case where the entire structure fits in registers, we set the + // size in memory to zero. + if (size < excess) + size = 0; + else + size -= excess; } } } @@ -2138,8 +2083,7 @@ static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); - return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); } SDValue @@ -2163,6 +2107,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue Flag; SmallVector<SDValue, 4> RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + bool isLittleEndian = Subtarget->isLittle(); // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; @@ -2189,12 +2134,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + HalfGPRs.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - HalfGPRs.getValue(1), Flag); + HalfGPRs.getValue(isLittleEndian ? 1 : 0), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc @@ -2206,12 +2154,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); + DAG.getVTList(MVT::i32, MVT::i32), Arg); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); } else Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); @@ -2240,8 +2191,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return LowerInterruptReturn(RetOps, dl, DAG); } - return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2310,10 +2260,10 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { } bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { - if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) + if (!Subtarget->supportsTailCall()) return false; - if (!CI->isTailCall()) + if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls) return false; return !Subtarget->isThumb1Only(); @@ -2403,13 +2353,14 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Entry.Node = Argument; Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); + // FIXME: is there useful debug info available here? - TargetLowering::CallLoweringInfo CLI(Chain, - (Type *) Type::getInt32Ty(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), + DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args), + 0); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.first; } @@ -2516,7 +2467,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, // If we have T2 ops, we can materialize the address directly via movt/movw // pair. This is always cheaper. - if (Subtarget->useMovt()) { + if (Subtarget->useMovt(DAG.getMachineFunction())) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. @@ -2538,56 +2489,46 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - // FIXME: Enable this for static codegen when tool issues are fixed. Also - // update ARMFastISel::ARMMaterializeGV. - if (Subtarget->useMovt() && RelocM != Reloc::Static) { + if (Subtarget->useMovt(DAG.getMachineFunction())) ++NumMovwMovt; - // FIXME: Once remat is capable of dealing with instructions with register - // operands, expand this into two nodes. - if (RelocM == Reloc::Static) - return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT)); - - unsigned Wrapper = (RelocM == Reloc::PIC_) - ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; - SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT)); - if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) - Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), - false, false, false, 0); - return Result; - } - unsigned ARMPCLabelIndex = 0; - SDValue CPAddr; - if (RelocM == Reloc::Static) { - CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); - } else { - ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); - ARMPCLabelIndex = AFI->createPICLabelUId(); - unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); - ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, - PCAdj); - CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); - } - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into multiple nodes + unsigned Wrapper = + RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper; - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); - SDValue Chain = Result.getValue(1); - - if (RelocM == Reloc::PIC_) { - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); - Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); - } + SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); + SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), - false, false, false, 0); + Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, false, 0); + return Result; +} + +SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); + assert(Subtarget->useMovt(DAG.getMachineFunction()) && + "Windows on ARM expects to use movw/movt"); + + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const ARMII::TOF TargetFlags = + (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG); + EVT PtrVT = getPointerTy(); + SDValue Result; + SDLoc DL(Op); + ++NumMovwMovt; + + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes. + Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0, + TargetFlags)); + if (GV->hasDLLImportStorageClass()) + Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, false, 0); return Result; } @@ -2636,6 +2577,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::arm_rbit: { + assert(Op.getOperand(1).getValueType() == MVT::i32 && + "RBIT intrinsic must have i32 type!"); + return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1)); + } case Intrinsic::arm_thread_pointer: { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); @@ -2779,7 +2725,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } - + if (!Subtarget->isLittle()) + std::swap (ArgValue, ArgValue2); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } @@ -2807,11 +2754,11 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, ArgRegsSize = NumGPRs * 4; // If parameter is split between stack and GPRs... - if (NumGPRs && Align == 8 && + if (NumGPRs && Align > 4 && (ArgRegsSize < ArgSize || InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { - // Add padding for part of param recovered from GPRs, so - // its last byte must be at address K*8 - 1. + // Add padding for part of param recovered from GPRs. For example, + // if Align == 8, its last byte must be at address K*8 - 1. // We need to do it, since remained (stack) part of parameter has // stack alignment, and we need to "attach" "GPRs head" without gaps // to it: @@ -2821,8 +2768,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, // ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned Padding = - ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - - (ArgRegsSize + AFI->getArgRegsSaveSize()); + OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align); ArgRegsSaveSize = ArgRegsSize + Padding; } else // We don't need to extend regs save size for byval parameters if they @@ -2846,10 +2792,12 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, unsigned OffsetFromOrigArg, unsigned ArgOffset, unsigned ArgSize, - bool ForceMutable) const { + bool ForceMutable, + unsigned ByValStoreOffset, + unsigned TotalArgRegsSaveSize) const { // Currently, two use-cases possible: - // Case #1. Non var-args function, and we meet first byval parameter. + // Case #1. Non-var-args function, and we meet first byval parameter. // Setup first unallocated register as first byval register; // eat all remained registers // (these two actions are performed by HandleByVal method). @@ -2883,7 +2831,6 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, // Note: once stack area for byval/varargs registers // was initialized, it can't be initialized again. if (ArgRegsSaveSize) { - unsigned Padding = ArgRegsSaveSize - ArgRegsSize; if (Padding) { @@ -2892,11 +2839,18 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, AFI->setStoredByValParamsPadding(Padding); } - int FrameIndex = MFI->CreateFixedObject( - ArgRegsSaveSize, - Padding + ArgOffset, - false); + int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize, + Padding + + ByValStoreOffset - + (int64_t)TotalArgRegsSaveSize, + false); SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); + if (Padding) { + MFI->CreateFixedObject(Padding, + ArgOffset + ByValStoreOffset - + (int64_t)ArgRegsSaveSize, + false); + } SmallVector<SDValue, 4> MemOps; for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; @@ -2921,13 +2875,18 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return FrameIndex; - } else + } else { + if (ArgSize == 0) { + // We cannot allocate a zero-byte object for the first variadic argument, + // so just make up a size. + ArgSize = 4; + } // This will point to the next argument passed via stack. return MFI->CreateFixedObject( - 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); + ArgSize, ArgOffset, !ForceMutable); + } } // Setup stack frame, the va_list pointer will start from. @@ -2935,6 +2894,7 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, unsigned ArgOffset, + unsigned TotalArgRegsSaveSize, bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -2945,8 +2905,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // If there is no regs to be stored, just point address after last // argument passed via stack. int FrameIndex = - StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), - 0, ArgOffset, 0, ForceMutable); + StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, + CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable, + 0, TotalArgRegsSaveSize); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -2983,6 +2944,51 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // We also increase this value in case of varargs function. AFI->setArgRegsSaveSize(0); + unsigned ByValStoreOffset = 0; + unsigned TotalArgRegsSaveSize = 0; + unsigned ArgRegsSaveSizeMaxAlign = 4; + + // Calculate the amount of stack space that we need to allocate to store + // byval and variadic arguments that are passed in registers. + // We need to know this before we allocate the first byval or variadic + // argument, as they will be allocated a stack slot below the CFA (Canonical + // Frame Address, the stack pointer at entry to the function). + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isMemLoc()) { + int index = VA.getValNo(); + if (index != lastInsIndex) { + ISD::ArgFlagsTy Flags = Ins[index].Flags; + if (Flags.isByVal()) { + unsigned ExtraArgRegsSize; + unsigned ExtraArgRegsSaveSize; + computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(), + Flags.getByValSize(), + ExtraArgRegsSize, ExtraArgRegsSaveSize); + + TotalArgRegsSaveSize += ExtraArgRegsSaveSize; + if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign) + ArgRegsSaveSizeMaxAlign = Flags.getByValAlign(); + CCInfo.nextInRegsParam(); + } + lastInsIndex = index; + } + } + } + CCInfo.rewindByValRegsInfo(); + lastInsIndex = -1; + if (isVarArg) { + unsigned ExtraArgRegsSize; + unsigned ExtraArgRegsSaveSize; + computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0, + ExtraArgRegsSize, ExtraArgRegsSaveSize); + TotalArgRegsSaveSize += ExtraArgRegsSaveSize; + } + // If the arg regs save area contains N-byte aligned values, the + // bottom of it must be at least N-byte aligned. + TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign); + TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); @@ -3081,18 +3087,23 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // a tail call. if (Flags.isByVal()) { unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); + + ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); int FrameIndex = StoreByValRegs( CCInfo, DAG, dl, Chain, CurOrigArg, CurByValIndex, Ins[VA.getValNo()].PartOffset, VA.getLocMemOffset(), Flags.getByValSize(), - true /*force mutable frames*/); + true /*force mutable frames*/, + ByValStoreOffset, + TotalArgRegsSaveSize); + ByValStoreOffset += Flags.getByValSize(); + ByValStoreOffset = std::min(ByValStoreOffset, 16U); InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); CCInfo.nextInRegsParam(); } else { - unsigned FIOffset = VA.getLocMemOffset() + - AFI->getStoredByValParamsPadding(); + unsigned FIOffset = VA.getLocMemOffset(); int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, FIOffset, true); @@ -3110,7 +3121,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // varargs if (isVarArg) VarArgStyleRegisters(CCInfo, DAG, dl, Chain, - CCInfo.getNextStackOffset()); + CCInfo.getNextStackOffset(), + TotalArgRegsSaveSize); + + AFI->setArgumentStackSize(CCInfo.getNextStackOffset()); return Chain; } @@ -3224,11 +3238,96 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } +std::pair<SDValue, SDValue> +ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, + SDValue &ARMcc) const { + assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); + + SDValue Value, OverflowCmp; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + + // FIXME: We are currently always generating CMPs because we don't support + // generating CMN through the backend. This is not as good as the natural + // CMP case because it causes a register dependency and cannot be folded + // later. + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::SADDO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::UADDO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::SSUBO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + case ISD::USUBO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + } // switch (...) + + return std::make_pair(Value, OverflowCmp); +} + + +SDValue +ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + // We use 0 and 1 as false and true values. + SDValue TVal = DAG.getConstant(1, MVT::i32); + SDValue FVal = DAG.getConstant(0, MVT::i32); + EVT VT = Op.getValueType(); + + SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal, + ARMcc, CCR, OverflowCmp); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); +} + + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); SDLoc dl(Op); + unsigned Opc = Cond.getOpcode(); + + if (Cond.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO)) { + if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + EVT VT = Op.getValueType(); + + return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse, + ARMcc, CCR, OverflowCmp); + + } // Convert: // @@ -3279,7 +3378,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) { if (CC == ISD::SETNE) return ISD::SETEQ; - return ISD::getSetCCSwappedOperands(CC); + return ISD::getSetCCInverse(CC, true); } static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, @@ -3530,7 +3629,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; - return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); + return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); } return SDValue(); @@ -3570,11 +3669,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; - SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, MVT::i32); SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; - Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); } return Res; } @@ -3771,7 +3870,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp1, 1).getValue(1); + Tmp1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); // Or in the signbit with integer operations. @@ -3787,7 +3886,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // f64: Or the high part with signbit and then combine two parts. Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); + Tmp0); SDValue Lo = Tmp0.getValue(0); SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); @@ -3799,6 +3898,9 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + EVT VT = Op.getValueType(); SDLoc dl(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -3816,14 +3918,16 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + const ARMBaseRegisterInfo &ARI = + *static_cast<const ARMBaseRegisterInfo*>(RegInfo); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) - ? ARM::R7 : ARM::R11; + unsigned FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -3832,6 +3936,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned ARMTargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { + unsigned Reg = StringSwitch<unsigned>(RegName) + .Case("sp", ARM::SP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -3861,8 +3977,15 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { - SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + SDValue Cvt; + if (TLI.isBigEndian() && SrcVT.isVector() && + SrcVT.getVectorNumElements() > 1) + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), + DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); + else + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), Op); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } @@ -3918,7 +4041,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two @@ -3952,7 +4075,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, @@ -4157,7 +4280,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; - Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); @@ -4380,7 +4503,6 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, // Value = 0x0000nnff: Op=x, Cmode=1100. OpCmode = 0xc; Imm = SplatBits >> 8; - SplatBits |= 0xff; break; } @@ -4389,7 +4511,6 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, // Value = 0x00nnffff: Op=x, Cmode=1101. OpCmode = 0xd; Imm = SplatBits >> 16; - SplatBits |= 0xffff; break; } @@ -4418,9 +4539,13 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, BitMask <<= 8; ImmMask <<= 1; } + + if (DAG.getTargetLoweringInfo().isBigEndian()) + // swap higher and lower 32 bit word + Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); + // Op=1, Cmode=1110. OpCmode = 0x1e; - SplatBits = Val; VT = is128Bits ? MVT::v2i64 : MVT::v1i64; break; } @@ -4917,7 +5042,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(N); Ops.push_back(Op.getOperand(I)); Ops.push_back(DAG.getConstant(I, MVT::i32)); - N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); } } return N; @@ -4928,7 +5053,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); @@ -4964,7 +5089,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5271,12 +5396,10 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, if (V2.getNode()->getOpcode() == ISD::UNDEF) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, @@ -5429,7 +5552,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(ShuffleMask[i] & (NumElts-1), MVT::i32))); } - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5666,7 +5789,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); + MVT::getVectorVT(TruncVT, NumElts), Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { @@ -6004,12 +6127,12 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, - CallingConv::C, /*isTaillCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed*/false, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, + std::move(Args), 0) + .setDiscardResult(); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, @@ -6031,40 +6154,11 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) return Op; - // Aquire/Release load/store is not legal for targets without a + // Acquire/Release load/store is not legal for targets without a // dmb or equivalent available. return SDValue(); } -static void -ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, - SelectionDAG &DAG) { - SDLoc dl(Node); - assert (Node->getValueType(0) == MVT::i64 && - "Only know how to expand i64 atomics"); - AtomicSDNode *AN = cast<AtomicSDNode>(Node); - - SmallVector<SDValue, 6> Ops; - Ops.push_back(Node->getOperand(0)); // Chain - Ops.push_back(Node->getOperand(1)); // Ptr - for(unsigned i=2; i<Node->getNumOperands(); i++) { - // Low part - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(i), DAG.getIntPtrConstant(0))); - // High part - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(i), DAG.getIntPtrConstant(1))); - } - SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); - SDValue Result = - DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(), - cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(), - AN->getSynchScope()); - SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); - Results.push_back(Result.getValue(2)); -} - static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, @@ -6085,8 +6179,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, }; Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, - DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], - array_lengthof(Ops)); + DAG.getVTList(MVT::i32, MVT::Other), Ops); OutChain = Cycles32.getValue(1); } else { // Intrinsic is defined to return 0 on unsupported platforms. Technically @@ -6109,8 +6202,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: - return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : - LowerGlobalAddressELF(Op, DAG); + switch (Subtarget->getTargetTriple().getObjectFormat()) { + default: llvm_unreachable("unknown object format"); + case Triple::COFF: + return LowerGlobalAddressWindows(Op, DAG); + case Triple::ELF: + return LowerGlobalAddressELF(Op, DAG); + case Triple::MachO: + return LowerGlobalAddressDarwin(Op, DAG); + } case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -6155,11 +6255,20 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + return LowerXALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + return LowerDYNAMIC_STACKALLOC(Op, DAG); + llvm_unreachable("Don't know how to custom lower this!"); } } @@ -6182,22 +6291,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; - case ISD::ATOMIC_STORE: - case ISD::ATOMIC_LOAD: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMAX: - ReplaceATOMIC_OP_64(N, Results, DAG); - return; } if (Res.getNode()) Results.push_back(Res); @@ -6207,538 +6300,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, // ARM Scheduler Hooks //===----------------------------------------------------------------------===// -MachineBasicBlock * -ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const { - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned oldval = MI->getOperand(2).getReg(); - unsigned newval = MI->getOperand(3).getReg(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - unsigned scratch = MRI.createVirtualRegister(isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass); - - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(oldval, &ARM::rGPRRegClass); - MRI.constrainRegClass(newval, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - - // loop1MBB: - // ldrex dest, [ptr] - // cmp dest, oldval - // bne exitMBB - BB = loop1MBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) - .addReg(dest).addReg(oldval)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(loop2MBB); - BB->addSuccessor(exitMBB); - - // loop2MBB: - // strex scratch, newval, [ptr] - // cmp scratch, #0 - // bne loop1MBB - BB = loop2MBB; - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(incr, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned scratch = MRI.createVirtualRegister(TRC); - unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrex dest, ptr - // <binop> scratch2, dest, incr - // strex scratch, scratch2, ptr - // cmp scratch, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - if (BinOpcode) { - // operand order needs to go the other way for NAND - if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) - AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). - addReg(incr).addReg(dest)).addReg(0); - else - AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). - addReg(dest).addReg(incr)).addReg(0); - } - - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, - bool signExtend, - ARMCC::CondCodes Cond) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - unsigned oldval = dest; - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(incr, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc, extendOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!"); - case 1: - extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; - break; - case 2: - extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; - break; - case 4: - extendOpc = 0; - break; - } - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned scratch = MRI.createVirtualRegister(TRC); - unsigned scratch2 = MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrex dest, ptr - // (sign extend dest, if required) - // cmp dest, incr - // cmov.cond scratch2, incr, dest - // strex scratch, scratch2, ptr - // cmp scratch, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - - // Sign extend the value, if necessary. - if (signExtend && extendOpc) { - oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass - : &ARM::GPRnopcRegClass); - if (!isThumb2) - MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) - .addReg(dest) - .addImm(0)); - } - - // Build compare and cmov instructions. - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) - .addReg(oldval).addReg(incr)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) - .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); - - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Op1, unsigned Op2, - bool NeedsCarry, bool IsCmpxchg, - bool IsMinMax, ARMCC::CondCodes CC) const { - // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64); - unsigned offset = (isStore ? -2 : 0); - unsigned destlo = MI->getOperand(0).getReg(); - unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(offset+2).getReg(); - unsigned vallo = MI->getOperand(offset+3).getReg(); - unsigned valhi = MI->getOperand(offset+4).getReg(); - unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(OrdIdx).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); - MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(vallo, &ARM::rGPRRegClass); - MRI.constrainRegClass(valhi, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *contBB = 0, *cont2BB = 0; - if (IsCmpxchg || IsMinMax) - contBB = MF->CreateMachineBasicBlock(LLVM_BB); - if (IsCmpxchg) - cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - - MF->insert(It, loopMBB); - if (IsCmpxchg || IsMinMax) MF->insert(It, contBB); - if (IsCmpxchg) MF->insert(It, cont2BB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned storesuccess = MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrexd r2, r3, ptr - // <binopa> r0, r2, incr - // <binopb> r1, r3, incr - // strexd storesuccess, r0, r1, ptr - // cmp storesuccess, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - - if (!isStore) { - // Load - if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr)); - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(GPRPair0, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - } - } - - unsigned StoreLo, StoreHi; - if (IsCmpxchg) { - // Add early exit - for (unsigned i = 0; i < 2; i++) { - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : - ARM::CMPrr)) - .addReg(i == 0 ? destlo : desthi) - .addReg(i == 0 ? vallo : valhi)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(exitMBB); - BB->addSuccessor(i == 0 ? contBB : cont2BB); - BB = (i == 0 ? contBB : cont2BB); - } - - // Copy to physregs for strexd - StoreLo = MI->getOperand(5).getReg(); - StoreHi = MI->getOperand(6).getReg(); - } else if (Op1) { - // Perform binary operation - unsigned tmpRegLo = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo) - .addReg(destlo).addReg(vallo)) - .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); - unsigned tmpRegHi = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi) - .addReg(desthi).addReg(valhi)) - .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); - - StoreLo = tmpRegLo; - StoreHi = tmpRegHi; - } else { - // Copy to physregs for strexd - StoreLo = vallo; - StoreHi = valhi; - } - if (IsMinMax) { - // Compare and branch to exit block. - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR); - BB->addSuccessor(exitMBB); - BB->addSuccessor(contBB); - BB = contBB; - StoreLo = vallo; - StoreHi = valhi; - } - - // Store - if (isThumb2) { - MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass); - MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); - } else { - // Marshal a pair... - unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(StoreLo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) - .addReg(r1) - .addReg(StoreHi) - .addImm(ARM::gsub_1); - - // ...and store it - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(StorePair).addReg(ptr)); - } - // Cmp+jump - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(storesuccess).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const { - - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - unsigned destlo = MI->getOperand(0).getReg(); - unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); - MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - } - unsigned ldrOpc, strOpc; - getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); - - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc)); - - if (isThumb2) { - MIB.addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr); - - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - MIB.addReg(GPRPair0, RegState::Define).addReg(ptr); - - // Copy GPRPair0 into dest. (This copy will normally be coalesced.) - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - } - AddDefaultPred(MIB); - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering:: @@ -7193,7 +6754,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { } // N.B. the order the invoke BBs are processed in doesn't matter here. - const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); + const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector<MachineBasicBlock*, 64> MBBLPads; for (SmallPtrSet<MachineBasicBlock*, 64>::iterator I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { @@ -7390,8 +6951,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnitSize = 0; - const TargetRegisterClass *TRC = 0; - const TargetRegisterClass *VecTRC = 0; + const TargetRegisterClass *TRC = nullptr; + const TargetRegisterClass *VecTRC = nullptr; bool IsThumb1 = Subtarget->isThumb1Only(); bool IsThumb2 = Subtarget->isThumb2(); @@ -7425,7 +6986,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, ? (const TargetRegisterClass *)&ARM::DPairRegClass : UnitSize == 8 ? (const TargetRegisterClass *)&ARM::DPRRegClass - : 0; + : nullptr; unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -7493,8 +7054,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // Load an immediate to varEnd. @@ -7608,6 +7168,72 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, } MachineBasicBlock * +ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetMachine &TM = getTargetMachine(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + assert(Subtarget->isTargetWindows() && + "__chkstk is only supported on Windows"); + assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); + + // __chkstk takes the number of words to allocate on the stack in R4, and + // returns the stack adjustment in number of bytes in R4. This will not + // clober any other registers (other than the obvious lr). + // + // Although, technically, IP should be considered a register which may be + // clobbered, the call itself will not touch it. Windows on ARM is a pure + // thumb-2 environment, so there is no interworking required. As a result, we + // do not expect a veneer to be emitted by the linker, clobbering IP. + // + // Each module receives its own copy of __chkstk, so no import thunk is + // required, again, ensuring that IP is not clobbered. + // + // Finally, although some linkers may theoretically provide a trampoline for + // out of range calls (which is quite common due to a 32M range limitation of + // branches for Thumb), we can generate the long-call version via + // -mcmodel=large, alleviating the need for the trampoline which may clobber + // IP. + + switch (TM.getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Default: + case CodeModel::Kernel: + BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + case CodeModel::Large: + case CodeModel::JITDefault: { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); + + BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) + .addExternalSymbol("__chkstk"); + BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(Reg, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + } + } + + AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), + ARM::SP) + .addReg(ARM::SP).addReg(ARM::R4))); + + MI->eraseFromParent(); + return MBB; +} + +MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -7670,131 +7296,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->eraseFromParent(); return BB; } - case ARM::ATOMIC_LOAD_ADD_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - case ARM::ATOMIC_LOAD_ADD_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - case ARM::ATOMIC_LOAD_ADD_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - - case ARM::ATOMIC_LOAD_AND_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_LOAD_AND_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_LOAD_AND_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - - case ARM::ATOMIC_LOAD_OR_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_OR_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_OR_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - - case ARM::ATOMIC_LOAD_XOR_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_XOR_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_XOR_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - - case ARM::ATOMIC_LOAD_NAND_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - case ARM::ATOMIC_LOAD_NAND_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - case ARM::ATOMIC_LOAD_NAND_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - - case ARM::ATOMIC_LOAD_SUB_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - case ARM::ATOMIC_LOAD_SUB_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - case ARM::ATOMIC_LOAD_SUB_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - - case ARM::ATOMIC_LOAD_MIN_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MIN_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MIN_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); - - case ARM::ATOMIC_LOAD_MAX_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); - case ARM::ATOMIC_LOAD_MAX_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); - case ARM::ATOMIC_LOAD_MAX_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); - - case ARM::ATOMIC_LOAD_UMIN_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMIN_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMIN_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); - - case ARM::ATOMIC_LOAD_UMAX_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); - case ARM::ATOMIC_LOAD_UMAX_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); - case ARM::ATOMIC_LOAD_UMAX_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); - - case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); - case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); - case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); - - case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); - case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); - case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); - - case ARM::ATOMIC_LOAD_I64: - return EmitAtomicLoad64(MI, BB); - - case ARM::ATOMIC_LOAD_ADD_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, - isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, - /*NeedsCarry*/ true); - case ARM::ATOMIC_LOAD_SUB_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true); - case ARM::ATOMIC_LOAD_OR_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, - isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_XOR_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, - isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_AND_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, - isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_STORE_I64: - case ARM::ATOMIC_SWAP_I64: - return EmitAtomicBinary64(MI, BB, 0, 0, false); - case ARM::ATOMIC_CMP_SWAP_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ false, /*IsCmpxchg*/true); - case ARM::ATOMIC_LOAD_MIN_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MAX_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::GE); - case ARM::ATOMIC_LOAD_UMIN_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMAX_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::HS); case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the @@ -7820,8 +7321,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(copy0MBB); @@ -7854,7 +7354,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::BCCi64: case ARM::BCCZi64: { // If there is an unconditional branch to the other successor, remove it. - BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); + BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end()); // Compare both parts that make up the double comparison separately for // equality. @@ -7939,8 +7439,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. SinkBB->splice(SinkBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); SinkBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(RSBBB); @@ -7983,6 +7482,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; return EmitStructByval(MI, BB); + case ARM::WIN__CHKSTK: + return EmitLowered__chkstk(MI, BB); } } @@ -8273,7 +7774,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); - switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { + + EVT inputLaneType = Vec.getValueType().getVectorElementType(); + switch (inputLaneType.getSimpleVT().SimpleTy) { case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; @@ -8281,9 +7784,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, llvm_unreachable("Invalid vector element type for padd optimization."); } - SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), - widenType, &Ops[0], Ops.size()); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp); + SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops); + unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; + return DAG.getNode(ExtOp, SDLoc(N), VT, tmp); } static SDValue findMUL_LOHI(SDValue V) { @@ -8341,7 +7844,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); - if (AddeNode == NULL) + if (!AddeNode) return SDValue(); // Make sure it is really an ADDE. @@ -8376,9 +7879,9 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Figure out the high and low input values to the MLAL node. SDValue* HiMul = &MULOp; - SDValue* HiAdd = NULL; - SDValue* LoMul = NULL; - SDValue* LowAdd = NULL; + SDValue* HiAdd = nullptr; + SDValue* LoMul = nullptr; + SDValue* LowAdd = nullptr; if (IsLeftOperandMUL) HiAdd = &AddeOp1; @@ -8395,7 +7898,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, LowAdd = &AddcOp0; } - if (LoMul == NULL) + if (!LoMul) return SDValue(); if (LoMul->getNode() != HiMul->getNode()) @@ -8412,8 +7915,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, Ops.push_back(*HiAdd); SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), - DAG.getVTList(MVT::i32, MVT::i32), - &Ops[0], Ops.size()); + DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(MLALNode.getNode(), 1); @@ -8937,6 +8439,8 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, std::min(4U, LD->getAlignment() / 2)); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); + if (DCI.DAG.getTargetLoweringInfo().isBigEndian()) + std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); DCI.RemoveFromWorklist(LD); DAG.DeleteNode(LD); @@ -9004,7 +8508,8 @@ static SDValue PerformSTORECombine(SDNode *N, SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; + for (unsigned i = 0; i < NumElems; ++i) + ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); @@ -9050,8 +8555,7 @@ static SDValue PerformSTORECombine(SDNode *N, Increment); Chains.push_back(Ch); } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], - Chains.size()); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } if (!ISD::isNormalStore(St)) @@ -9062,16 +8566,18 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; + bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, - StVal.getNode()->getOperand(0), BasePtr, - St->getPointerInfo(), St->isVolatile(), + StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), + BasePtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, MVT::i32)); - return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), + return DAG.getStore(NewST1.getValue(0), DL, + StVal.getNode()->getOperand(isBigEndian ? 0 : 1), OffsetPtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), std::min(4U, St->getAlignment() / 2)); @@ -9147,7 +8653,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -9230,7 +8736,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Fold obvious case. V = V.getOperand(0); else { - V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); + V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(V.getNode()); } @@ -9426,7 +8932,7 @@ static SDValue CombineBaseUpdate(SDNode *N, Tys[n] = VecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); + SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs+2)); SmallVector<SDValue, 8> Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); @@ -9436,8 +8942,7 @@ static SDValue CombineBaseUpdate(SDNode *N, } MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops.data(), Ops.size(), - MemInt->getMemoryVT(), + Ops, MemInt->getMemoryVT(), MemInt->getMemOperand()); // Update the uses. @@ -9506,11 +9011,11 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); + SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, - Ops, 2, VLDMemInt->getMemoryVT(), + Ops, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); // Update the uses. @@ -9759,9 +9264,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { // loads from a constant pool. case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: - case Intrinsic::arm_neon_vshiftls: - case Intrinsic::arm_neon_vshiftlu: - case Intrinsic::arm_neon_vshiftn: case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: case Intrinsic::arm_neon_vrshiftn: @@ -9792,12 +9294,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { } return SDValue(); - case Intrinsic::arm_neon_vshiftls: - case Intrinsic::arm_neon_vshiftlu: - if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) - break; - llvm_unreachable("invalid shift count for vshll intrinsic"); - case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) @@ -9815,7 +9311,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { break; llvm_unreachable("invalid shift count for vqshlu intrinsic"); - case Intrinsic::arm_neon_vshiftn: case Intrinsic::arm_neon_vrshiftn: case Intrinsic::arm_neon_vqshiftns: case Intrinsic::arm_neon_vqshiftnu: @@ -9838,16 +9333,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { case Intrinsic::arm_neon_vshiftu: // Opcode already set above. break; - case Intrinsic::arm_neon_vshiftls: - case Intrinsic::arm_neon_vshiftlu: - if (Cnt == VT.getVectorElementType().getSizeInBits()) - VShiftOpc = ARMISD::VSHLLi; - else - VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? - ARMISD::VSHLLs : ARMISD::VSHLLu); - break; - case Intrinsic::arm_neon_vshiftn: - VShiftOpc = ARMISD::VSHRN; break; case Intrinsic::arm_neon_vrshifts: VShiftOpc = ARMISD::VRSHRs; break; case Intrinsic::arm_neon_vrshiftu: @@ -10128,7 +9613,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (Res.getNode()) { APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); + DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, @@ -10211,7 +9696,8 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } -bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { +bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned, + bool *Fast) const { // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); @@ -10233,7 +9719,7 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const case MVT::v2f64: { // For any little-endian targets with neon, we can support unaligned ld/st // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. - // A big-endian target may also explictly support unaligned accesses + // A big-endian target may also explicitly support unaligned accesses if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { if (Fast) *Fast = true; @@ -10265,11 +9751,11 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) { + (allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) { return MVT::v2f64; } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || - (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) { + (allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) { return MVT::f64; } } @@ -10714,11 +10200,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return true; } -void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { unsigned BitWidth = KnownOne.getBitWidth(); KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { @@ -10734,15 +10220,29 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; - DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; } + case ISD::INTRINSIC_W_CHAIN: { + ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1)); + Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue()); + switch (IntID) { + default: return; + case Intrinsic::arm_ldaex: + case Intrinsic::arm_ldrex: { + EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); + unsigned MemBits = VT.getScalarType().getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + return; + } + } + } } } @@ -10818,7 +10318,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. @@ -10897,7 +10397,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector<SDValue>&Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result; // Currently only support length 1 constraints. if (Constraint.length() != 1) return; @@ -11096,16 +10596,41 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); SDLoc dl(Op); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, - 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); return CallInfo.first; } +SDValue +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "unsupported target platform"); + SDLoc DL(Op); + + // Get the inputs. + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + + SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, + DAG.getConstant(2, MVT::i32)); + + SDValue Flag; + Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); + Flag = Chain.getValue(1); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); + + SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); + Chain = NewSP.getValue(1); + + SDValue Ops[2] = { NewSP, Chain }; + return DAG.getMergeValues(Ops, DL); +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -11191,6 +10716,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; @@ -11203,6 +10729,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } + case Intrinsic::arm_stlex: case Intrinsic::arm_strex: { PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; @@ -11215,6 +10742,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; @@ -11226,6 +10754,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldaexd: case Intrinsic::arm_ldrexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; @@ -11243,3 +10772,178 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } + +/// \brief Returns true if it is beneficial to convert a load of a constant +/// to just the constant itself. +bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned Bits = Ty->getPrimitiveSizeInBits(); + if (Bits == 0 || Bits > 32) + return false; + return true; +} + +bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { + // Loads and stores less than 64-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit + // anything for those. + bool IsMClass = Subtarget->isMClass(); + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return Size == 64 && !IsMClass; + } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass; + } + + // For the real atomic operations, we have ldrex/strex up to 32 bits, + // and up to 64 bits on the non-M profiles + unsigned AtomicLimit = IsMClass ? 32 : 64; + return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit; +} + +Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Type *ValTy = cast<PointerType>(Addr->getType())->getElementType(); + bool IsAcquire = + Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd + // intrinsic must return {i32, i32} and we have to recombine them into a + // single i64 here. + if (ValTy->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); + + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); + + Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); + Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + return Builder.CreateOr( + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); + } + + Type *Tys[] = { Addr->getType() }; + Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateTruncOrBitCast( + Builder.CreateCall(Ldrex, Addr), + cast<PointerType>(Addr->getType())->getElementType()); +} + +Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + bool IsRelease = + Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since the intrinsics must have legal type, the i64 intrinsics take two + // parameters: "i32, i32". We must marshal Val into the appropriate form + // before the call. + if (Val->getType()->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; + Function *Strex = Intrinsic::getDeclaration(M, Int); + Type *Int32Ty = Type::getInt32Ty(M->getContext()); + + Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); + Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + return Builder.CreateCall3(Strex, Lo, Hi, Addr); + } + + Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; + Type *Tys[] = { Addr->getType() }; + Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateCall2( + Strex, Builder.CreateZExtOrBitCast( + Val, Strex->getFunctionType()->getParamType(0)), + Addr); +} + +enum HABaseType { + HA_UNKNOWN = 0, + HA_FLOAT, + HA_DOUBLE, + HA_VECT64, + HA_VECT128 +}; + +static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, + uint64_t &Members) { + if (const StructType *ST = dyn_cast<StructType>(Ty)) { + for (unsigned i = 0; i < ST->getNumElements(); ++i) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) + return false; + Members += SubMembers; + } + } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) + return false; + Members += SubMembers * AT->getNumElements(); + } else if (Ty->isFloatTy()) { + if (Base != HA_UNKNOWN && Base != HA_FLOAT) + return false; + Members = 1; + Base = HA_FLOAT; + } else if (Ty->isDoubleTy()) { + if (Base != HA_UNKNOWN && Base != HA_DOUBLE) + return false; + Members = 1; + Base = HA_DOUBLE; + } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) { + Members = 1; + switch (Base) { + case HA_FLOAT: + case HA_DOUBLE: + return false; + case HA_VECT64: + return VT->getBitWidth() == 64; + case HA_VECT128: + return VT->getBitWidth() == 128; + case HA_UNKNOWN: + switch (VT->getBitWidth()) { + case 64: + Base = HA_VECT64; + return true; + case 128: + Base = HA_VECT128; + return true; + default: + return false; + } + } + } + + return (Members > 0 && Members <= 4); +} + +/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. +bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { + if (getEffectiveCallingConv(CallConv, isVarArg) != + CallingConv::ARM_AAPCS_VFP) + return false; + + HABaseType Base = HA_UNKNOWN; + uint64_t Members = 0; + bool result = isHomogeneousAggregate(Ty, Base, Members); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + return result; +} diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 90facddeb02b..1ace0f330f1a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -15,17 +15,15 @@ #ifndef ARMISELLOWERING_H #define ARMISELLOWERING_H -#include "ARM.h" -#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <vector> namespace llvm { class ARMConstantPoolValue; + class ARMSubtarget; namespace ARMISD { // ARM Specific DAG Nodes @@ -35,8 +33,6 @@ namespace llvm { Wrapper, // Wrapper - A wrapper node for TargetConstantPool, // TargetExternalSymbol, and TargetGlobalAddress. - WrapperDYN, // WrapperDYN - A wrapper node for TargetGlobalAddress in - // DYN mode. WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in // PIC mode. WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable @@ -99,6 +95,8 @@ namespace llvm { PRELOAD, // Preload + WIN__CHKSTK, // Windows' __chkstk call to do stack probing. + VCEQ, // Vector compare equal. VCEQZ, // Vector compare equal to zero. VCGE, // Vector compare greater than or equal. @@ -115,10 +113,6 @@ namespace llvm { VSHL, // ...left VSHRs, // ...right (signed) VSHRu, // ...right (unsigned) - VSHLLs, // ...left long (signed) - VSHLLu, // ...left long (unsigned) - VSHLLi, // ...left long (with maximum shift count) - VSHRN, // ...right narrow // Vector rounding shift by immediate: VRSHRs, // ...right (signed) @@ -240,116 +234,114 @@ namespace llvm { public: explicit ARMTargetLowering(TargetMachine &TM); - virtual unsigned getJumpTableEncoding() const; + unsigned getJumpTableEncoding() const override; - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// - virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) const; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, + SelectionDAG &DAG) const override; - virtual const char *getTargetNodeName(unsigned Opcode) const; + const char *getTargetNodeName(unsigned Opcode) const override; - virtual bool isSelectSupported(SelectSupportKind Kind) const { + bool isSelectSupported(SelectSupportKind Kind) const override { // ARM does not support scalar condition selects on vectors. return (Kind != ScalarCondVectorVal); } /// getSetCCResultType - Return the value type to use for ISD::SETCC. - virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; + EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; - virtual MachineBasicBlock * + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB) const override; - virtual void - AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const; + void AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const override; SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const; + bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses of the specified type. Returns whether it /// is "fast" by reference in the second argument. - virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace, + bool *Fast) const override; - virtual EVT getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, - bool MemcpyStrSrc, - MachineFunction &MF) const; + EVT getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, + bool MemcpyStrSrc, + MachineFunction &MF) const override; using TargetLowering::isZExtFree; - virtual bool isZExtFree(SDValue Val, EVT VT2) const; + bool isZExtFree(SDValue Val, EVT VT2) const override; - virtual bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const; + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can /// compare a register against the immediate without having to materialize /// the immediate into a register. - virtual bool isLegalICmpImmediate(int64_t Imm) const; + bool isLegalICmpImmediate(int64_t Imm) const override; /// isLegalAddImmediate - Return true if the specified immediate is legal /// add immediate, that is the target has add instructions which can /// add a register and the immediate without having to materialize /// the immediate into a register. - virtual bool isLegalAddImmediate(int64_t Imm) const; + bool isLegalAddImmediate(int64_t Imm) const override; /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. - virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const; + bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; /// getPostIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. - virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, - SDValue &Base, SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const; + bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, + SDValue &Offset, ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; - virtual void computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const; + void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const override; - virtual bool ExpandInlineAsm(CallInst *CI) const; + bool ExpandInlineAsm(CallInst *CI) const override; - ConstraintType getConstraintType(const std::string &Constraint) const; + ConstraintType + getConstraintType(const std::string &Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. ConstraintWeight getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const; + AsmOperandInfo &info, const char *constraint) const override; std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + MVT VT) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is /// true it means one of the asm constraint of the inline asm instruction /// being processed is 'm'. - virtual void LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector<SDValue> &Ops, - SelectionDAG &DAG) const; + void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const override; const ARMSubtarget* getSubtarget() const { return Subtarget; @@ -357,39 +349,58 @@ namespace llvm { /// getRegClassFor - Return the register class that should be used for the /// specified value type. - virtual const TargetRegisterClass *getRegClassFor(MVT VT) const; + const TargetRegisterClass *getRegClassFor(MVT VT) const override; /// getMaximalGlobalOffset - Returns the maximal possible offset which can /// be used for loads / stores from the global. - virtual unsigned getMaximalGlobalOffset() const; + unsigned getMaximalGlobalOffset() const override; /// Returns true if a cast between SrcAS and DestAS is a noop. - virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. return true; } /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) const; + FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const override; - Sched::Preference getSchedulingPreference(SDNode *N) const; + Sched::Preference getSchedulingPreference(SDNode *N) const override; - bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const; - bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + bool + isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. - virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; + + bool getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + unsigned Intrinsic) const override; + + /// \brief Returns true if it is beneficial to convert a load of a constant + /// to just the constant itself. + bool shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const override; + + /// \brief Returns true if an argument of type Ty needs to be passed in a + /// contiguous block of registers in calling convention CallConv. + bool functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + + Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const override; + Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, AtomicOrdering Ord) const override; + + bool shouldExpandAtomicInIR(Instruction *Inst) const override; - virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, - unsigned Intrinsic) const; protected: std::pair<const TargetRegisterClass*, uint8_t> - findRepresentativeClass(MVT VT) const; + findRepresentativeClass(MVT VT) const override; private: /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can @@ -407,6 +418,7 @@ namespace llvm { void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); + std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const; typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector; void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, @@ -420,6 +432,8 @@ namespace llvm { SDValue &Root, SelectionDAG &DAG, SDLoc dl) const; + CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, @@ -433,6 +447,7 @@ namespace llvm { SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const; @@ -441,6 +456,7 @@ namespace llvm { TLSModel::Model model) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; @@ -456,6 +472,9 @@ namespace llvm { const ARMSubtarget *ST) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + + unsigned getRegisterByName(const char* RegName, EVT VT) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be @@ -466,7 +485,7 @@ namespace llvm { /// lower a pair of fmul and fadd to the latter so it's not clear that there /// would be a gain or that the gain would be worthwhile enough to risk /// correctness bugs. - virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } + bool isFMAFasterThanFMulAndFAdd(EVT VT) const override { return false; } SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; @@ -477,12 +496,12 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals, bool isThisReturn, SDValue ThisVal) const; - virtual SDValue + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + SmallVectorImpl<SDValue> &InVals) const override; int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, @@ -491,11 +510,14 @@ namespace llvm { unsigned OffsetFromOrigArg, unsigned ArgOffset, unsigned ArgSize, - bool ForceMutable) const; + bool ForceMutable, + unsigned ByValStoreOffset, + unsigned TotalArgRegsSaveSize) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, unsigned ArgOffset, + unsigned TotalArgRegsSaveSize, bool ForceMutable = false) const; void computeRegArea(CCState &CCInfo, MachineFunction &MF, @@ -504,12 +526,12 @@ namespace llvm { unsigned &ArgRegsSize, unsigned &ArgRegsSaveSize) const; - virtual SDValue + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const; + SmallVectorImpl<SDValue> &InVals) const override; /// HandleByVal - Target-specific cleanup for ByVal support. - virtual void HandleByVal(CCState *, unsigned &, unsigned) const; + void HandleByVal(CCState *, unsigned &, unsigned) const override; /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call @@ -524,21 +546,21 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const; - virtual bool CanLowerReturn(CallingConv::ID CallConv, - MachineFunction &MF, bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - LLVMContext &Context) const; + bool CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const override; - virtual SDValue + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - SDLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const override; - virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; + bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; - virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; + bool mayBeEmittedAsTailCall(CallInst *CI) const override; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const; @@ -548,29 +570,6 @@ namespace llvm { SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; - MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const; - MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, - unsigned BinOpcode) const; - MachineBasicBlock *EmitAtomicBinary64(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Op1, - unsigned Op2, - bool NeedsCarry = false, - bool IsCmpxchg = false, - bool IsMinMax = false, - ARMCC::CondCodes CC = ARMCC::AL) const; - MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, - bool signExtend, - ARMCC::CondCodes Cond) const; - MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI, - MachineBasicBlock *BB) const; - void SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const; @@ -582,6 +581,9 @@ namespace llvm { MachineBasicBlock *EmitStructByval(MachineInstr *MI, MachineBasicBlock *MBB) const; + + MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI, + MachineBasicBlock *MBB) const; }; enum NEONModImmType { @@ -590,7 +592,6 @@ namespace llvm { OtherModImm }; - namespace ARM { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index f93504fddb8e..59e9260b81ea 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -212,25 +212,25 @@ def msr_mask : Operand<i32> { // 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0> // 64 64 - <imm> is encoded in imm6<5:0> def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; } -def shr_imm8 : Operand<i32> { +def shr_imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 8; }]> { let EncoderMethod = "getShiftRight8Imm"; let DecoderMethod = "DecodeShiftRight8Imm"; let ParserMatchClass = shr_imm8_asm_operand; } def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; } -def shr_imm16 : Operand<i32> { +def shr_imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 16; }]> { let EncoderMethod = "getShiftRight16Imm"; let DecoderMethod = "DecodeShiftRight16Imm"; let ParserMatchClass = shr_imm16_asm_operand; } def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; } -def shr_imm32 : Operand<i32> { +def shr_imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> { let EncoderMethod = "getShiftRight32Imm"; let DecoderMethod = "DecodeShiftRight32Imm"; let ParserMatchClass = shr_imm32_asm_operand; } def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; } -def shr_imm64 : Operand<i32> { +def shr_imm64 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 64; }]> { let EncoderMethod = "getShiftRight64Imm"; let DecoderMethod = "DecodeShiftRight64Imm"; let ParserMatchClass = shr_imm64_asm_operand; @@ -329,10 +329,10 @@ class InstThumb<AddrMode am, int sz, IndexMode im, // Pseudo-instructions for alternate assembly syntax (never used by codegen). // These are aliases that require C++ handling to convert to the target // instruction, while InstAliases can be handled directly by tblgen. -class AsmPseudoInst<string asm, dag iops> +class AsmPseudoInst<string asm, dag iops, dag oops = (outs)> : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain, "", NoItinerary> { - let OutOperandList = (outs); + let OutOperandList = oops; let InOperandList = iops; let Pattern = []; let isCodeGenOnly = 0; // So we get asm matcher for it. @@ -340,16 +340,16 @@ class AsmPseudoInst<string asm, dag iops> let isPseudo = 1; } -class ARMAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>, - Requires<[IsARM]>; -class tAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>, - Requires<[IsThumb]>; -class t2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>, - Requires<[IsThumb2]>; -class VFP2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>, - Requires<[HasVFP2]>; -class NEONAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>, - Requires<[HasNEON]>; +class ARMAsmPseudo<string asm, dag iops, dag oops = (outs)> + : AsmPseudoInst<asm, iops, oops>, Requires<[IsARM]>; +class tAsmPseudo<string asm, dag iops, dag oops = (outs)> + : AsmPseudoInst<asm, iops, oops>, Requires<[IsThumb]>; +class t2AsmPseudo<string asm, dag iops, dag oops = (outs)> + : AsmPseudoInst<asm, iops, oops>, Requires<[IsThumb2]>; +class VFP2AsmPseudo<string asm, dag iops, dag oops = (outs)> + : AsmPseudoInst<asm, iops, oops>, Requires<[HasVFP2]>; +class NEONAsmPseudo<string asm, dag iops, dag oops = (outs)> + : AsmPseudoInst<asm, iops, oops>, Requires<[HasNEON]>; // Pseudo instructions for the code generator. class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern> @@ -477,6 +477,10 @@ class AXI<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> : XI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin, asm, "", pattern>; +class AXIM<dag oops, dag iops, AddrMode am, Format f, InstrItinClass itin, + string asm, list<dag> pattern> + : XI<oops, iops, am, 4, IndexModeNone, f, itin, + asm, "", pattern>; class AInoP<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> : InoP<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin, @@ -2025,7 +2029,7 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, // Same as N2V but not predicated. class N2Vnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, dag oops, dag iops, InstrItinClass itin, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, list<dag> pattern> + string Dt, list<dag> pattern> : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin, OpcodeStr, Dt, "$Vd, $Vm", "", pattern> { bits<5> Vd; @@ -2134,8 +2138,7 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, class N3Vnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops,Format f, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDPatternOperator IntOp, bit Commutable, list<dag> pattern> + string OpcodeStr, string Dt, list<dag> pattern> : NeonInp<oops, iops, AddrModeNone, IndexModeNone, f, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", pattern> { bits<5> Vd; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index df867b49ab57..f235ac225848 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -97,7 +97,7 @@ namespace { static char ID; ARMCGBR() : MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); if (AFI->getGlobalBaseReg() == 0) return false; @@ -146,11 +146,11 @@ namespace { return true; } - virtual const char *getPassName() const { + const char *getPassName() const override { return "ARM PIC Global Base Reg Initialization"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h index 5d3e059b7038..b09958aaa950 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h @@ -14,10 +14,8 @@ #ifndef ARMINSTRUCTIONINFO_H #define ARMINSTRUCTIONINFO_H -#include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" -#include "ARMSubtarget.h" namespace llvm { class ARMSubtarget; @@ -28,17 +26,17 @@ public: explicit ARMInstrInfo(const ARMSubtarget &STI); /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const; + void getNoopForMachoTarget(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. - unsigned getUnindexedOpcode(unsigned Opc) const; + unsigned getUnindexedOpcode(unsigned Opc) const override; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const ARMRegisterInfo &getRegisterInfo() const { return RI; } + const ARMRegisterInfo &getRegisterInfo() const override { return RI; } }; } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index 7a14b8ebf11d..a02d997f3c4e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -95,7 +95,6 @@ def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>; // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; -def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>; def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; @@ -187,7 +186,8 @@ def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>; def HasV4T : Predicate<"Subtarget->hasV4TOps()">, AssemblerPredicate<"HasV4TOps", "armv4t">; def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; -def HasV5T : Predicate<"Subtarget->hasV5TOps()">; +def HasV5T : Predicate<"Subtarget->hasV5TOps()">, + AssemblerPredicate<"HasV5TOps", "armv5t">; def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">, AssemblerPredicate<"HasV5TEOps", "armv5te">; def HasV6 : Predicate<"Subtarget->hasV6Ops()">, @@ -244,6 +244,7 @@ def HasMP : Predicate<"Subtarget->hasMPExtension()">, def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">, AssemblerPredicate<"FeatureTrustZone", "TrustZone">; +def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">, @@ -261,14 +262,16 @@ def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate<"!ModeThumb", "arm-mode">; def IsIOS : Predicate<"Subtarget->isTargetIOS()">; def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">; +def IsMachO : Predicate<"Subtarget->isTargetMachO()">; +def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, AssemblerPredicate<"FeatureNaClTrap", "NaCl">; def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; // FIXME: Eventually this will be just "hasV6T2Ops". -def UseMovt : Predicate<"Subtarget->useMovt()">; -def DontUseMovt : Predicate<"!Subtarget->useMovt()">; +def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; +def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; def UseMulOps : Predicate<"Subtarget->useMulOps()">; @@ -276,7 +279,8 @@ def UseMulOps : Predicate<"Subtarget->useMulOps()">; // But only select them if more precision in FP computation is allowed. // Do not use them for Darwin platforms. def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" - " FPOpFusion::Fast) && " + " FPOpFusion::Fast && " + " Subtarget->hasVFP4()) && " "!Subtarget->isTargetDarwin()">; def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion ==" " FPOpFusion::Fast &&" @@ -489,7 +493,7 @@ def neon_vcvt_imm32 : Operand<i32> { // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24. def rot_imm_XFORM: SDNodeXForm<imm, [{ switch (N->getZExtValue()){ - default: assert(0); + default: llvm_unreachable(nullptr); case 0: return CurDAG->getTargetConstant(0, MVT::i32); case 8: return CurDAG->getTargetConstant(1, MVT::i32); case 16: return CurDAG->getTargetConstant(2, MVT::i32); @@ -590,7 +594,7 @@ def so_imm2part : PatLeaf<(imm), [{ /// arm_i32imm - True for +V6T2, or true only if so_imm2part is true. /// def arm_i32imm : PatLeaf<(imm), [{ - if (Subtarget->hasV6T2Ops()) + if (Subtarget->useMovt(*MF)) return true; return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]>; @@ -987,6 +991,81 @@ def addrmode6oneL32 : Operand<i32>, let EncoderMethod = "getAddrMode6OneLane32AddressOpValue"; } +// Base class for addrmode6 with specific alignment restrictions. +class AddrMode6Align : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm:$align); + let EncoderMethod = "getAddrMode6AddressOpValue"; + let DecoderMethod = "DecodeAddrMode6Operand"; +} + +// Special version of addrmode6 to handle no allowed alignment encoding for +// VLD/VST instructions and checking the alignment is not specified. +def AddrMode6AlignNoneAsmOperand : AsmOperandClass { + let Name = "AlignedMemoryNone"; + let DiagnosticType = "AlignedMemoryRequiresNone"; +} +def addrmode6alignNone : AddrMode6Align { + // The alignment specifier can only be omitted. + let ParserMatchClass = AddrMode6AlignNoneAsmOperand; +} + +// Special version of addrmode6 to handle 16-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align16AsmOperand : AsmOperandClass { + let Name = "AlignedMemory16"; + let DiagnosticType = "AlignedMemoryRequires16"; +} +def addrmode6align16 : AddrMode6Align { + // The alignment specifier can only be 16 or omitted. + let ParserMatchClass = AddrMode6Align16AsmOperand; +} + +// Special version of addrmode6 to handle 32-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align32AsmOperand : AsmOperandClass { + let Name = "AlignedMemory32"; + let DiagnosticType = "AlignedMemoryRequires32"; +} +def addrmode6align32 : AddrMode6Align { + // The alignment specifier can only be 32 or omitted. + let ParserMatchClass = AddrMode6Align32AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align64AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64"; + let DiagnosticType = "AlignedMemoryRequires64"; +} +def addrmode6align64 : AddrMode6Align { + // The alignment specifier can only be 64 or omitted. + let ParserMatchClass = AddrMode6Align64AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding +// for VLD/VST instructions and checking the alignment value. +def AddrMode6Align64or128AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64or128"; + let DiagnosticType = "AlignedMemoryRequires64or128"; +} +def addrmode6align64or128 : AddrMode6Align { + // The alignment specifier can only be 64, 128 or omitted. + let ParserMatchClass = AddrMode6Align64or128AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit, 128-bit or 256-bit alignment +// encoding for VLD/VST instructions and checking the alignment value. +def AddrMode6Align64or128or256AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64or128or256"; + let DiagnosticType = "AlignedMemoryRequires64or128or256"; +} +def addrmode6align64or128or256 : AddrMode6Align { + // The alignment specifier can only be 64, 128, 256 or omitted. + let ParserMatchClass = AddrMode6Align64or128or256AsmOperand; +} + // Special version of addrmode6 to handle alignment encoding for VLD-dup // instructions, specifically VLD4-dup. def addrmode6dup : Operand<i32>, @@ -999,6 +1078,69 @@ def addrmode6dup : Operand<i32>, let ParserMatchClass = AddrMode6AsmOperand; } +// Base class for addrmode6dup with specific alignment restrictions. +class AddrMode6DupAlign : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm); + let EncoderMethod = "getAddrMode6DupAddressOpValue"; +} + +// Special version of addrmode6 to handle no allowed alignment encoding for +// VLD-dup instruction and checking the alignment is not specified. +def AddrMode6dupAlignNoneAsmOperand : AsmOperandClass { + let Name = "DupAlignedMemoryNone"; + let DiagnosticType = "DupAlignedMemoryRequiresNone"; +} +def addrmode6dupalignNone : AddrMode6DupAlign { + // The alignment specifier can only be omitted. + let ParserMatchClass = AddrMode6dupAlignNoneAsmOperand; +} + +// Special version of addrmode6 to handle 16-bit alignment encoding for VLD-dup +// instruction and checking the alignment value. +def AddrMode6dupAlign16AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory16"; + let DiagnosticType = "DupAlignedMemoryRequires16"; +} +def addrmode6dupalign16 : AddrMode6DupAlign { + // The alignment specifier can only be 16 or omitted. + let ParserMatchClass = AddrMode6dupAlign16AsmOperand; +} + +// Special version of addrmode6 to handle 32-bit alignment encoding for VLD-dup +// instruction and checking the alignment value. +def AddrMode6dupAlign32AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory32"; + let DiagnosticType = "DupAlignedMemoryRequires32"; +} +def addrmode6dupalign32 : AddrMode6DupAlign { + // The alignment specifier can only be 32 or omitted. + let ParserMatchClass = AddrMode6dupAlign32AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit alignment encoding for VLD +// instructions and checking the alignment value. +def AddrMode6dupAlign64AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory64"; + let DiagnosticType = "DupAlignedMemoryRequires64"; +} +def addrmode6dupalign64 : AddrMode6DupAlign { + // The alignment specifier can only be 64 or omitted. + let ParserMatchClass = AddrMode6dupAlign64AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding +// for VLD instructions and checking the alignment value. +def AddrMode6dupAlign64or128AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory64or128"; + let DiagnosticType = "DupAlignedMemoryRequires64or128"; +} +def addrmode6dupalign64or128 : AddrMode6DupAlign { + // The alignment specifier can only be 64, 128 or omitted. + let ParserMatchClass = AddrMode6dupAlign64or128AsmOperand; +} + // addrmodepc := pc + reg // def addrmodepc : Operand<i32>, @@ -1685,7 +1827,8 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, } def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, - "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { + "hint", "\t$imm", [(int_arm_hint imm0_239:$imm)]>, + Requires<[IsARM, HasV6]> { bits<8> imm; let Inst{27-8} = 0b00110010000011110000; let Inst{7-0} = imm; @@ -1698,8 +1841,6 @@ def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; -def : Pat<(int_arm_sevl), (HINT 5)>; - def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { bits<4> Rd; @@ -1725,6 +1866,8 @@ def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, let Inst{31-28} = 0xe; // AL let Inst{7-4} = 0b0111; } +// default immediate for breakpoint mnemonic +def : InstAlias<"bkpt", (BKPT 0)>, Requires<[IsARM]>; def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> { @@ -1770,8 +1913,8 @@ let imod = 0, iflags = 0, M = 1 in // Preload signals the memory system of possible future data/instruction access. multiclass APreLoad<bits<1> read, bits<1> data, string opc> { - def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload, - !strconcat(opc, "\t$addr"), + def i12 : AXIM<(outs), (ins addrmode_imm12:$addr), AddrMode_i12, MiscFrm, + IIC_Preload, !strconcat(opc, "\t$addr"), [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]>, Sched<[WritePreLd]> { bits<4> Rt; @@ -1824,6 +1967,18 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", let Inst{3-0} = opt; } +// A8.8.247 UDF - Undefined (Encoding A1) +def UDF : AInoP<(outs), (ins imm0_65535:$imm16), MiscFrm, NoItinerary, + "udf", "\t$imm16", [(int_arm_undefined imm0_65535:$imm16)]> { + bits<16> imm16; + let Inst{31-28} = 0b1110; // AL + let Inst{27-25} = 0b011; + let Inst{24-20} = 0b11111; + let Inst{19-8} = imm16{15-4}; + let Inst{7-4} = 0b1111; + let Inst{3-0} = imm16{3-0}; +} + /* * A5.4 Permanently UNDEFINED instructions. * @@ -2272,11 +2427,10 @@ def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { -// Load doubleword -def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), - (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoad_d_r, "ldrd", "\t$Rd, $dst2, $addr", - []>, Requires<[IsARM, HasV5TE]>; + // Load doubleword + def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr), + LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>, + Requires<[IsARM, HasV5TE]>; } def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), @@ -2441,11 +2595,11 @@ def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } -def LDRT_POST_IMM : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, LdFrm, IIC_iLoad_ru, - "ldrt", "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { +def LDRT_POST_IMM + : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, LdFrm, IIC_iLoad_ru, + "ldrt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm bits<14> offset; @@ -2477,11 +2631,11 @@ def LDRBT_POST_REG : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } -def LDRBT_POST_IMM : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, LdFrm, IIC_iLoad_bh_ru, - "ldrbt", "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { +def LDRBT_POST_IMM + : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, LdFrm, IIC_iLoad_bh_ru, + "ldrbt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm bits<14> offset; @@ -2524,6 +2678,14 @@ defm LDRHT : AI3ldrT<0b1011, "ldrht">; defm LDRSHT : AI3ldrT<0b1111, "ldrsht">; } +def LDRT_POST + : ARMAsmPseudo<"ldrt${q} $Rt, $addr", (ins addr_offset_none:$addr, pred:$q), + (outs GPR:$Rt)>; + +def LDRBT_POST + : ARMAsmPseudo<"ldrbt${q} $Rt, $addr", (ins addr_offset_none:$addr, pred:$q), + (outs GPR:$Rt)>; + // Store // Stores with truncate @@ -2532,12 +2694,12 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>; // Store doubleword -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in -def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr), - StMiscFrm, IIC_iStore_d_r, - "strd", "\t$Rt, $src2, $addr", []>, - Requires<[IsARM, HasV5TE]> { - let Inst{21} = 0; +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { + def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr), + StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>, + Requires<[IsARM, HasV5TE]> { + let Inst{21} = 0; + } } // Indexed stores @@ -2546,7 +2708,8 @@ multiclass AI2_stridx<bit isByte, string opc, def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre, StFrm, iii, - opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + opc, "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { bits<17> addr; let Inst{25} = 0; let Inst{23} = addr{12}; // U (add = ('U' == 1)) @@ -2558,7 +2721,8 @@ multiclass AI2_stridx<bit isByte, string opc, def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, ldst_so_reg:$addr), IndexModePre, StFrm, iir, - opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + opc, "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { bits<17> addr; let Inst{25} = 1; let Inst{23} = addr{12}; // U (add = ('U' == 1)) @@ -2571,7 +2735,7 @@ multiclass AI2_stridx<bit isByte, string opc, (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), IndexModePost, StFrm, iir, opc, "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm bits<14> offset; @@ -2589,7 +2753,7 @@ multiclass AI2_stridx<bit isByte, string opc, (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), IndexModePost, StFrm, iii, opc, "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm bits<14> offset; @@ -2746,11 +2910,11 @@ def STRBT_POST_REG : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } -def STRBT_POST_IMM : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, StFrm, IIC_iStore_bh_ru, - "strbt", "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { +def STRBT_POST_IMM + : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, StFrm, IIC_iStore_bh_ru, + "strbt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm bits<14> offset; @@ -2763,6 +2927,10 @@ def STRBT_POST_IMM : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb), let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } +def STRBT_POST + : ARMAsmPseudo<"strbt${q} $Rt, $addr", + (ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>; + let mayStore = 1, neverHasSideEffects = 1 in { def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), @@ -2783,11 +2951,11 @@ def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } -def STRT_POST_IMM : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, StFrm, IIC_iStore_ru, - "strt", "\t$Rt, $addr, $offset", - "$addr.base = $Rn_wb", []> { +def STRT_POST_IMM + : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), + IndexModePost, StFrm, IIC_iStore_ru, + "strt", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd // {11-0} imm12/Rm bits<14> offset; @@ -2801,6 +2969,9 @@ def STRT_POST_IMM : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), } } +def STRT_POST + : ARMAsmPseudo<"strt${q} $Rt, $addr", + (ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>; multiclass AI3strT<bits<4> op, string opc> { def i : AI3ldstidxT<op, 0, (outs GPR:$base_wb), @@ -3165,8 +3336,8 @@ def SBFX : I<(outs GPRnopc:$Rd), let Inst{3-0} = Rn; } -def UBFX : I<(outs GPR:$Rd), - (ins GPR:$Rn, imm0_31:$lsb, imm1_32:$width), +def UBFX : I<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width), AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { @@ -3601,21 +3772,22 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, Requires<[IsARM, NoV6, UseMulOps]>; } -def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), +def MLA : AsMul1I32<0b0000001, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6, UseMulOps]> { + [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))]>, + Requires<[IsARM, HasV6, UseMulOps]> { bits<4> Ra; let Inst{15-12} = Ra; } let Constraints = "@earlyclobber $Rd" in -def MLAv5: ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s), - 4, IIC_iMAC32, - [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))], - (MLA GPR:$Rd, GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; +def MLAv5: ARMPseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, + pred:$p, cc_out:$s), 4, IIC_iMAC32, + [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))], + (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", @@ -3683,7 +3855,8 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), let Inst{3-0} = Rn; } -let Constraints = "$RLo = $RdLo,$RHi = $RdHi" in { +let Constraints = + "@earlyclobber $RdLo,@earlyclobber $RdHi,$RLo = $RdLo,$RHi = $RdHi" in { def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], @@ -3698,14 +3871,6 @@ def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), Requires<[IsARM, NoV6]>; } -let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { -def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p), - 4, IIC_iMAC64, [], - (UMAAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p)>, - Requires<[IsARM, NoV6]>; -} - } // neverHasSideEffects // Most significant word multiply @@ -3972,6 +4137,11 @@ def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), Requires<[IsARM, HasV6]>, Sched<[WriteALU]>; +def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)), + (REV16 (LDRH addrmode3:$addr))>; +def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr), + (STRH (REV16 GPR:$Rn), addrmode3:$addr)>; + let AddedComplexity = 5 in def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", @@ -4276,7 +4446,7 @@ def instsyncb_opt : Operand<i32> { let DecoderMethod = "DecodeInstSyncBarrierOption"; } -// memory barriers protect the atomic sequences +// Memory barriers protect the atomic sequences let hasSideEffects = 1 in { def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>, @@ -4285,7 +4455,6 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{31-4} = 0xf57ff05; let Inst{3-0} = opt; } -} def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>, @@ -4297,226 +4466,19 @@ def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, // ISB has only full system option def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary, - "isb", "\t$opt", []>, + "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>, Requires<[IsARM, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf57ff06; let Inst{3-0} = opt; } +} let usesCustomInserter = 1, Defs = [CPSR] in { // Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; - -// Atomic pseudo-insts which will be lowered to ldrex/strex loops. -// (64-bit pseudos use a hand-written selection code). - let mayLoad = 1, mayStore = 1 in { - def ATOMIC_LOAD_ADD_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_SUB_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_AND_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_OR_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_XOR_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_NAND_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MIN_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MAX_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMIN_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMAX_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_SWAP_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_CMP_SWAP_I8 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_ADD_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_SUB_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_AND_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_OR_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_XOR_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_NAND_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MIN_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MAX_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMIN_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMAX_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_SWAP_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_CMP_SWAP_I16 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_ADD_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_SUB_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_AND_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_OR_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_XOR_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_NAND_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MIN_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MAX_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMIN_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMAX_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$val, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_SWAP_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_CMP_SWAP_I32 : PseudoInst< - (outs GPR:$dst), - (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_ADD_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_SUB_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_AND_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_OR_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_XOR_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_NAND_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MIN_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_MAX_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMIN_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_LOAD_UMAX_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_SWAP_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; - def ATOMIC_CMP_SWAP_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, - GPR:$set1, GPR:$set2, i32imm:$ordering), - NoItinerary, []>; - } - let mayLoad = 1 in - def ATOMIC_LOAD_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, i32imm:$ordering), - NoItinerary, []>; - let mayStore = 1 in - def ATOMIC_STORE_I64 : PseudoInst< - (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), - NoItinerary, []>; } let usesCustomInserter = 1 in { @@ -4553,6 +4515,33 @@ def strex_4 : PatFrag<(ops node:$val, node:$ptr), return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; }]>; +def ldaex_1 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldaex_2 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldaex_4 : PatFrag<(ops node:$ptr), (int_arm_ldaex node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +def stlex_1 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_stlex node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def stlex_2 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_stlex node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def stlex_4 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_stlex node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + let mayLoad = 1 in { def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, "ldrexb", "\t$Rt, $addr", @@ -4570,11 +4559,14 @@ def LDREXD : AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), } def LDAEXB : AIldaex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldaexb", "\t$Rt, $addr", []>; + NoItinerary, "ldaexb", "\t$Rt, $addr", + [(set GPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>; def LDAEXH : AIldaex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldaexh", "\t$Rt, $addr", []>; + NoItinerary, "ldaexh", "\t$Rt, $addr", + [(set GPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>; def LDAEX : AIldaex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldaex", "\t$Rt, $addr", []>; + NoItinerary, "ldaex", "\t$Rt, $addr", + [(set GPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>; let hasExtraDefRegAllocReq = 1 in def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), NoItinerary, "ldaexd", "\t$Rt, $addr", []> { @@ -4585,13 +4577,16 @@ def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), let mayStore = 1, Constraints = "@earlyclobber $Rd" in { def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strexb", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, (strex_1 GPR:$Rt, addr_offset_none:$addr))]>; + [(set GPR:$Rd, (strex_1 GPR:$Rt, + addr_offset_none:$addr))]>; def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strexh", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, (strex_2 GPR:$Rt, addr_offset_none:$addr))]>; + [(set GPR:$Rd, (strex_2 GPR:$Rt, + addr_offset_none:$addr))]>; def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strex", "\t$Rd, $Rt, $addr", - [(set GPR:$Rd, (strex_4 GPR:$Rt, addr_offset_none:$addr))]>; + [(set GPR:$Rd, (strex_4 GPR:$Rt, + addr_offset_none:$addr))]>; let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), (ins GPRPairOp:$Rt, addr_offset_none:$addr), @@ -4600,13 +4595,16 @@ def STREXD : AIstrex<0b01, (outs GPR:$Rd), } def STLEXB: AIstlex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "stlexb", "\t$Rd, $Rt, $addr", - []>; + [(set GPR:$Rd, + (stlex_1 GPR:$Rt, addr_offset_none:$addr))]>; def STLEXH: AIstlex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "stlexh", "\t$Rd, $Rt, $addr", - []>; + [(set GPR:$Rd, + (stlex_2 GPR:$Rt, addr_offset_none:$addr))]>; def STLEX : AIstlex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "stlex", "\t$Rd, $Rt, $addr", - []>; + [(set GPR:$Rd, + (stlex_4 GPR:$Rt, addr_offset_none:$addr))]>; let hasExtraSrcRegAllocReq = 1 in def STLEXD : AIstlex<0b01, (outs GPR:$Rd), (ins GPRPairOp:$Rt, addr_offset_none:$addr), @@ -4621,15 +4619,16 @@ def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", let Inst{31-0} = 0b11110101011111111111000000011111; } -def : ARMPat<(and (ldrex_1 addr_offset_none:$addr), 0xff), - (LDREXB addr_offset_none:$addr)>; -def : ARMPat<(and (ldrex_2 addr_offset_none:$addr), 0xffff), - (LDREXH addr_offset_none:$addr)>; def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), (STREXB GPR:$Rt, addr_offset_none:$addr)>; def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), (STREXH GPR:$Rt, addr_offset_none:$addr)>; +def : ARMPat<(stlex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (STLEXB GPR:$Rt, addr_offset_none:$addr)>; +def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (STLEXH GPR:$Rt, addr_offset_none:$addr)>; + class acquiring_load<PatFrag base> : PatFrag<(ops node:$ptr), (base node:$ptr), [{ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); @@ -4960,7 +4959,7 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>, Requires<[PreV8]>; -def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm", +def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0)>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, @@ -4968,7 +4967,7 @@ def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>, Requires<[PreV8]>; -def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm", +def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; @@ -5097,6 +5096,19 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, let Inst{11-0} = a; } +// Dynamic stack allocation yields a _chkstk for Windows targets. These calls +// are needed to probe the stack when allocating more than +// 4k bytes in one go. Touching the stack at 4K increments is necessary to +// ensure that the guard pages used by the OS virtual memory manager are +// allocated in correct sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; +let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in + def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>; + //===----------------------------------------------------------------------===// // TLS Instructions // @@ -5104,9 +5116,11 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, // __aeabi_read_tp preserves the registers r1-r3. // This is a pseudo inst so that we can get the encoding right, // complete with fixup for the aeabi_read_tp function. +// TPsoft is valid for ARM mode only, in case of Thumb mode a tTPsoft pattern +// is defined in "ARMInstrThumb.td". let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in { - def TPsoft : PseudoInst<(outs), (ins), IIC_Br, + def TPsoft : ARMPseudoInst<(outs), (ins), 4, IIC_Br, [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>; } @@ -5184,6 +5198,10 @@ def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, [(set GPR:$dst, (arm_i32imm:$src))]>, Requires<[IsARM]>; +def LDRLIT_ga_abs : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iLoad_i, + [(set GPR:$dst, (ARMWrapper tglobaladdr:$src))]>, + Requires<[IsARM, DontUseMovt]>; + // Pseudo instruction that combines movw + movt + add pc (if PIC). // It also makes it possible to rematerialize the instructions. // FIXME: Remove this when we can do generalized remat and when machine licm @@ -5194,10 +5212,17 @@ def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, Requires<[IsARM, UseMovt]>; -def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2, - [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, - Requires<[IsARM, UseMovt]>; +def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iLoadiALU, + [(set GPR:$dst, + (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsARM, DontUseMovt]>; + +def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + NoItinerary, + [(set GPR:$dst, + (load (ARMWrapperPIC tglobaladdr:$addr)))]>, + Requires<[IsARM, DontUseMovt]>; let AddedComplexity = 10 in def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), @@ -5207,8 +5232,6 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), } // isReMaterializable // ConstantPool, GlobalAddress, and JumpTable -def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, - Requires<[IsARM, DontUseMovt]>; def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, Requires<[IsARM, UseMovt]>; @@ -5544,9 +5567,22 @@ def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>, Requires<[IsARM, NoV6]>; -// UMULL/SMULL are available on all arches, but the instruction definitions -// need difference constraints pre-v6. Use these aliases for the assembly -// parsing on pre-v6. +// MUL/UMLAL/SMLAL/UMULL/SMULL are available on all arches, but +// the instruction definitions need difference constraints pre-v6. +// Use these aliases for the assembly parsing on pre-v6. +def : InstAlias<"mul${s}${p} $Rd, $Rn, $Rm", + (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; +def : InstAlias<"mla${s}${p} $Rd, $Rn, $Rm, $Ra", + (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, + pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; +def : InstAlias<"smlal${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; +def : InstAlias<"umlal${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm", (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 0b05c08ed948..c02bb3b55f5b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -39,6 +39,49 @@ def nImmVMOVI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } + +def nImmVMOVI16AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi16vmovByteReplicate"; + let PredicateMethod = "isNEONi16ByteReplicate"; + let RenderMethod = "addNEONvmovByteReplicateOperands"; +} +def nImmVMOVI32AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi32vmovByteReplicate"; + let PredicateMethod = "isNEONi32ByteReplicate"; + let RenderMethod = "addNEONvmovByteReplicateOperands"; +} +def nImmVMVNI16AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi16invByteReplicate"; + let PredicateMethod = "isNEONi16ByteReplicate"; + let RenderMethod = "addNEONinvByteReplicateOperands"; +} +def nImmVMVNI32AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi32invByteReplicate"; + let PredicateMethod = "isNEONi32ByteReplicate"; + let RenderMethod = "addNEONinvByteReplicateOperands"; +} + +def nImmVMOVI16ByteReplicate : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate; +} +def nImmVMOVI32ByteReplicate : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate; +} +def nImmVMVNI16ByteReplicate : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate; +} +def nImmVMVNI32ByteReplicate : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate; +} + def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } def nImmVMOVI32Neg : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; @@ -466,9 +509,6 @@ def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; -def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; -def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; -def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; @@ -620,37 +660,37 @@ class VLDQQQQWBPseudo<InstrItinClass itin> let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) -class VLD1D<bits<4> op7_4, string Dt> +class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), - (ins addrmode6:$Rn), IIC_VLD1, + (ins AddrMode:$Rn), IIC_VLD1, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -class VLD1Q<bits<4> op7_4, string Dt> +class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), - (ins addrmode6:$Rn), IIC_VLD1x2, + (ins AddrMode:$Rn), IIC_VLD1x2, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -def VLD1d8 : VLD1D<{0,0,0,?}, "8">; -def VLD1d16 : VLD1D<{0,1,0,?}, "16">; -def VLD1d32 : VLD1D<{1,0,0,?}, "32">; -def VLD1d64 : VLD1D<{1,1,0,?}, "64">; +def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; +def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; +def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; +def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; -def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; -def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; -def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; -def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; +def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; +def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; +def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; +def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; // ...with address register writeback: -multiclass VLD1DWB<bits<4> op7_4, string Dt> { +multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1u, + (ins AddrMode:$Rn), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -658,16 +698,16 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } } -multiclass VLD1QWB<bits<4> op7_4, string Dt> { +multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1x2u, + (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -675,7 +715,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; @@ -683,27 +723,27 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { } } -defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; -defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; -defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; -defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; -defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; -defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; -defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; -defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; +defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; +defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; +defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; +defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; +defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; +defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; +defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; +defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; // ...with 3 registers -class VLD1D3<bits<4> op7_4, string Dt> +class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), - (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, + (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VLD1D3WB<bits<4> op7_4, string Dt> { +multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1x2u, + (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -711,7 +751,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; @@ -719,32 +759,32 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> { } } -def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; -def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; -def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; -def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; +def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; +def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; +def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; +def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; -defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; -defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; -defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; -defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; +defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; +defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; +defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; +defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>; def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>; // ...with 4 registers -class VLD1D4<bits<4> op7_4, string Dt> +class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), - (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, + (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VLD1D4WB<bits<4> op7_4, string Dt> { +multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1x2u, + (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -752,7 +792,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; @@ -760,15 +800,15 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> { } } -def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; -def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; -def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; -def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; +def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; +def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; +def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; +def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; -defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; -defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; -defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; -defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; +defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; +defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>; @@ -776,22 +816,28 @@ def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>; // VLD2 : Vector Load (multiple 2-element structures) class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, - InstrItinClass itin> + InstrItinClass itin, Operand AddrMode> : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), - (ins addrmode6:$Rn), itin, + (ins AddrMode:$Rn), itin, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } -def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; -def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; -def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, + addrmode6align64or128>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, + addrmode6align64or128>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, + addrmode6align64or128>; -def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; -def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; -def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; +def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>; +def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>; +def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>; def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; @@ -799,9 +845,9 @@ def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; // ...with address register writeback: multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, - RegisterOperand VdTy, InstrItinClass itin> { + RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn), itin, + (ins AddrMode:$Rn), itin, "vld2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -809,7 +855,7 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), itin, + (ins AddrMode:$Rn, rGPR:$Rm), itin, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; @@ -817,13 +863,19 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, } } -defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; -defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; -defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, + addrmode6align64or128>; -defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; -defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; -defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; +defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>; +defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>; +defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>; def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; @@ -833,12 +885,18 @@ def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; // ...with double-spaced registers -def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; -def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; -def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; -defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; -defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; -defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1296,47 +1354,55 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // VLD1DUP : Vector Load (single element to all lanes) -class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> +class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, + Operand AddrMode> : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), - (ins addrmode6dup:$Rn), + (ins AddrMode:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", [(set VecListOneDAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; -def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; -def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; +def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, + addrmode6dupalignNone>; +def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, + addrmode6dupalign16>; +def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, + addrmode6dupalign32>; def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPd32 addrmode6:$addr)>; -class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> +class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, + Operand AddrMode> : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), - (ins addrmode6dup:$Rn), IIC_VLD1dup, + (ins AddrMode:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", [(set VecListDPairAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; -def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; -def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; +def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, + addrmode6dupalignNone>; +def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, + addrmode6dupalign16>; +def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, + addrmode6dupalign32>; def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPq32 addrmode6:$addr)>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: -multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { +multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn), IIC_VLD1dupu, + (ins AddrMode:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1345,17 +1411,17 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } } -multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { +multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn), IIC_VLD1dupu, + (ins AddrMode:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1364,7 +1430,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; @@ -1372,38 +1438,47 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { } } -defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; -defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; -defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; +defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; +defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; +defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; -defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; -defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; -defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; +defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; +defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; +defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; // VLD2DUP : Vector Load (single 2-element structure to all lanes) -class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> +class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), - (ins addrmode6dup:$Rn), IIC_VLD2dup, + (ins AddrMode:$Rn), IIC_VLD2dup, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } -def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; -def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; -def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, + addrmode6dupalign16>; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, + addrmode6dupalign32>; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, + addrmode6dupalign64>; +// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or +// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". // ...with double-spaced registers -def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; -def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; -def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, + addrmode6dupalign16>; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, + addrmode6dupalign32>; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, + addrmode6dupalign64>; // ...with address register writeback: -multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { +multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, + Operand AddrMode> { def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn), IIC_VLD2dupu, + (ins AddrMode:$Rn), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1412,7 +1487,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { } def _register : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; @@ -1420,13 +1495,19 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { } } -defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; -defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; -defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, + addrmode6dupalign16>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, + addrmode6dupalign32>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, + addrmode6dupalign64>; -defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; -defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; -defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, + addrmode6dupalign16>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, + addrmode6dupalign32>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, + addrmode6dupalign64>; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP<bits<4> op7_4, string Dt> @@ -1452,22 +1533,22 @@ def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: -class VLD3DUPWB<bits<4> op7_4, string Dt> +class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, + (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; } -def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; -def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; -def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; +def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; +def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; +def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; -def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; -def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; -def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; +def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; +def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; +def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; @@ -1563,35 +1644,35 @@ class VSTQQQQWBPseudo<InstrItinClass itin> "$addr.addr = $wb">; // VST1 : Vector Store (multiple single elements) -class VST1D<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), +class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> + : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -class VST1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), +class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -def VST1d8 : VST1D<{0,0,0,?}, "8">; -def VST1d16 : VST1D<{0,1,0,?}, "16">; -def VST1d32 : VST1D<{1,0,0,?}, "32">; -def VST1d64 : VST1D<{1,1,0,?}, "64">; +def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; +def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; +def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; +def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; -def VST1q8 : VST1Q<{0,0,?,?}, "8">; -def VST1q16 : VST1Q<{0,1,?,?}, "16">; -def VST1q32 : VST1Q<{1,0,?,?}, "32">; -def VST1q64 : VST1Q<{1,1,?,?}, "64">; +def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; +def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; +def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; +def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; // ...with address register writeback: -multiclass VST1DWB<bits<4> op7_4, string Dt> { +multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1599,7 +1680,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1607,9 +1688,9 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } } -multiclass VST1QWB<bits<4> op7_4, string Dt> { +multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, + (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1617,7 +1698,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1626,28 +1707,28 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { } } -defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; -defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; -defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; -defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; +defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; +defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; +defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; +defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; -defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; -defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; -defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; -defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; +defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; +defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; +defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; +defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; // ...with 3 registers -class VST1D3<bits<4> op7_4, string Dt> +class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$Rn, VecListThreeD:$Vd), + (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VST1D3WB<bits<4> op7_4, string Dt> { +multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, + (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1655,7 +1736,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1664,33 +1745,33 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { } } -def VST1d8T : VST1D3<{0,0,0,?}, "8">; -def VST1d16T : VST1D3<{0,1,0,?}, "16">; -def VST1d32T : VST1D3<{1,0,0,?}, "32">; -def VST1d64T : VST1D3<{1,1,0,?}, "64">; +def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; +def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; +def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; +def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; -defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; -defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; -defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; -defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; +defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; +defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; +defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; +defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>; def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; // ...with 4 registers -class VST1D4<bits<4> op7_4, string Dt> +class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins addrmode6:$Rn, VecListFourD:$Vd), + (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VST1D4WB<bits<4> op7_4, string Dt> { +multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, + (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1698,7 +1779,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1707,15 +1788,15 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { } } -def VST1d8Q : VST1D4<{0,0,?,?}, "8">; -def VST1d16Q : VST1D4<{0,1,?,?}, "16">; -def VST1d32Q : VST1D4<{1,0,?,?}, "32">; -def VST1d64Q : VST1D4<{1,1,?,?}, "64">; +def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; +def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; +def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; +def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; -defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; -defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; -defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; -defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; +defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; +defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>; @@ -1723,21 +1804,27 @@ def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; // VST2 : Vector Store (multiple 2-element structures) class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, - InstrItinClass itin> - : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), + InstrItinClass itin, Operand AddrMode> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } -def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; -def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; -def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, + addrmode6align64or128>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, + addrmode6align64or128>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, + addrmode6align64or128>; -def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; -def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; -def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; +def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>; +def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>; +def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>; def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; @@ -1745,9 +1832,9 @@ def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; // ...with address register writeback: multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, - RegisterOperand VdTy> { + RegisterOperand VdTy, Operand AddrMode> { def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1755,16 +1842,16 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } } -multiclass VST2QWB<bits<4> op7_4, string Dt> { +multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1772,7 +1859,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1781,13 +1868,16 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { } } -defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; -defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; -defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, + addrmode6align64or128>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, + addrmode6align64or128>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, + addrmode6align64or128>; -defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; -defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; -defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; +defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; @@ -1797,12 +1887,18 @@ def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; // ...with double-spaced registers -def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; -def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; -def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; -defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; -defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; -defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, + addrmode6align64or128>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, + addrmode6align64or128>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, + addrmode6align64or128>; // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -2270,9 +2366,9 @@ def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), (VST1q64 addrmode6:$addr, QPR:$value)>; def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), - (VLD1q32 addrmode6:$addr)>; + (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>; def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), - (VST1q32 addrmode6:$addr, QPR:$value)>; + (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), @@ -2360,14 +2456,14 @@ class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), - itin, OpcodeStr, Dt, ResTy, OpTy, + itin, OpcodeStr, Dt, [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), - itin, OpcodeStr, Dt, ResTy, OpTy, + itin, OpcodeStr, Dt, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; // Similar to NV2VQIntnp with some more encoding bits exposed (crypto). @@ -2375,7 +2471,7 @@ class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), - itin, OpcodeStr, Dt, ResTy, OpTy, + itin, OpcodeStr, Dt, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; // Same as N2VQIntXnp but with Vd as a src register. @@ -2384,7 +2480,7 @@ class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), - itin, OpcodeStr, Dt, ResTy, OpTy, + itin, OpcodeStr, Dt, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { let Constraints = "$src = $Vd"; } @@ -2558,7 +2654,6 @@ class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, SDPatternOperator IntOp, bit Commutable> : N3Vnp<op27_23, op21_20, op11_8, op6, op4, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, - ResTy, OpTy, IntOp, Commutable, [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, @@ -2612,7 +2707,6 @@ class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, SDPatternOperator IntOp, bit Commutable> : N3Vnp<op27_23, op21_20, op11_8, op6, op4, (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, - ResTy, OpTy, IntOp, Commutable, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; // Same as N3VQIntnp but with Vd as a src register. @@ -2621,8 +2715,8 @@ class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> : N3Vnp<op27_23, op21_20, op11_8, op6, op4, - (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, - Dt, ResTy, OpTy, IntOp, Commutable, + (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), + f, itin, OpcodeStr, Dt, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { let Constraints = "$src = $Vd"; @@ -2942,7 +3036,6 @@ class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, SDPatternOperator IntOp, bit Commutable> : N3Vnp<op27_23, op21_20, op11_8, op6, op4, (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, - ResTy, OpTy, IntOp, Commutable, [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, @@ -3038,22 +3131,23 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // Long shift by immediate. class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> + ValueType ResTy, ValueType OpTy, Operand ImmTy, + SDPatternOperator OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", - [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), - (i32 imm:$SIMM))))]>; + [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; // Narrow shift by immediate. class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> + ValueType ResTy, ValueType OpTy, Operand ImmTy, + SDPatternOperator OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), - (i32 imm:$SIMM))))]>; + (i32 ImmTy:$SIMM))))]>; // Shift right by immediate and accumulate, // both double- and quad-register. @@ -3941,7 +4035,8 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, // Neon Shift Long operations, // element sizes of 8, 16, 32 bits: multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, - bit op4, string OpcodeStr, string Dt, SDNode OpNode> { + bit op4, string OpcodeStr, string Dt, + SDPatternOperator OpNode> { def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx @@ -3960,7 +4055,7 @@ multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, // element sizes of 16, 32, 64 bits: multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { + SDPatternOperator OpNode> { def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, shr_imm8, OpNode> { @@ -4427,14 +4522,14 @@ defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", - "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; + "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", - "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; + "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", - "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; + "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", - "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; + "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; // VTST : Vector Test Bits defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; @@ -4946,28 +5041,51 @@ defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", NEONvshru>; // VSHLL : Vector Shift Left Long -defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; -defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; +defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", + PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>; +defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", + PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>; // VSHLL : Vector Shift Left Long (with maximum shift count) class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, - ValueType OpTy, Operand ImmTy, SDNode OpNode> + ValueType OpTy, Operand ImmTy> : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, - ResTy, OpTy, ImmTy, OpNode> { + ResTy, OpTy, ImmTy, null_frag> { let Inst{21-16} = op21_16; let DecoderMethod = "DecodeVSHLMaxInstruction"; } def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", - v8i16, v8i8, imm8, NEONvshlli>; + v8i16, v8i8, imm8>; def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", - v4i32, v4i16, imm16, NEONvshlli>; + v4i32, v4i16, imm16>; def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", - v2i64, v2i32, imm32, NEONvshlli>; + v2i64, v2i32, imm32>; + +def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))), + (VSHLLi8 DPR:$Rn, 8)>; +def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))), + (VSHLLi16 DPR:$Rn, 16)>; +def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))), + (VSHLLi32 DPR:$Rn, 32)>; +def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))), + (VSHLLi8 DPR:$Rn, 8)>; +def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))), + (VSHLLi16 DPR:$Rn, 16)>; +def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))), + (VSHLLi32 DPR:$Rn, 32)>; // VSHRN : Vector Shift Right and Narrow defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", - NEONvshrn>; + PatFrag<(ops node:$Rn, node:$amt), + (trunc (NEONvshrs node:$Rn, node:$amt))>>; + +def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))), + (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; +def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))), + (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; +def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))), + (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; // VRSHL : Vector Rounding Shift defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, @@ -5077,9 +5195,6 @@ def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), (VABSv4i32 QPR:$src)>; -def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; -def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; - // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", @@ -5226,6 +5341,55 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; } // isReMaterializable +// Add support for bytes replication feature, so it could be GAS compatible. +// E.g. instructions below: +// "vmov.i32 d0, 0xffffffff" +// "vmov.i32 d0, 0xabababab" +// "vmov.i16 d0, 0xabab" +// are incorrect, but we could deal with such cases. +// For last two instructions, for example, it should emit: +// "vmov.i8 d0, 0xab" +def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; + +// Also add same support for VMVN instructions. So instruction: +// "vmvn.i32 d0, 0xabababab" +// actually means: +// "vmov.i8 d0, 0x54" +def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; + +// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" +// require zero cycles to execute so they should be used wherever possible for +// setting a register to zero. + +// Even without these pseudo-insts we would probably end up with the correct +// instruction, but we could not mark the general ones with "isAsCheapAsAMove" +// since they are sometimes rather expensive (in general). + +let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { + def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, + [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], + (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, + Requires<[HasZCZ]>; + def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, + [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], + (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, + Requires<[HasZCZ]>; +} + // VMOV : Vector Get Lane (move scalar to ARM core register) def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, @@ -5490,10 +5654,12 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; -def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", - [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; -def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", - [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; +def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))), + (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$src, ssub_0), (i32 0)))>; +def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))), + (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$src, ssub_0), (i32 0)))>; // VMOVN : Vector Narrowing Move defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, @@ -5576,22 +5742,22 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; } -def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; @@ -5874,7 +6040,7 @@ defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; // Cryptography instructions let PostEncoderMethod = "NEONThumb2DataIPostEncoder", - DecoderNamespace = "v8Crypto" in { + DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { class AES<string op, bit op7, bit op6, SDPatternOperator Int> : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, !strconcat("aes", op), "8", v16i8, v16i8, Int>, @@ -5904,17 +6070,45 @@ def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; -def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>; +def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; -def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>; -def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>; -def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>; +def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; +def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; +def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; +def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), + (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG + (SHA1H (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), + ssub_0)), + ssub_0)), GPR)>; + +def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), + (SHA1C v4i32:$hash_abcd, + (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), + ssub_0), + v4i32:$wk)>; + +def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), + (SHA1M v4i32:$hash_abcd, + (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), + ssub_0), + v4i32:$wk)>; + +def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), + (SHA1P v4i32:$hash_abcd, + (SUBREG_TO_REG (i64 0), + (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), + ssub_0), + v4i32:$wk)>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// @@ -5982,67 +6176,145 @@ def : Pat<(f32 (bitconvert GPR:$a)), //===----------------------------------------------------------------------===// // bit_convert -def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; +} def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; +} def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; +} def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; +} def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; +} -def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; +} def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; +} def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; +} def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; +} def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +} + +let Predicates = [IsBE] in { + // 64 bit conversions + def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; + + // 128 bit conversions + def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; +} // Fold extracting an element out of a v2i32 into a vfp register. def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), @@ -6051,7 +6323,7 @@ def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), // Vector lengthening move with load, matching extending loads. // extload, zextload and sextload for a standard lengthening load. Example: -// Lengthen_Single<"8", "i16", "8"> = +// Lengthen_Single<"8", "i16", "8"> = // Pat<(v8i16 (extloadvi8 addrmode6:$addr)) // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, // (f64 (IMPLICIT_DEF)), (i32 0)))>; @@ -6078,7 +6350,7 @@ multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { // half the lanes available. Example: // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), -// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, // (f64 (IMPLICIT_DEF)), (i32 0))), // dsub_0)>; multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, @@ -6100,6 +6372,32 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, dsub_0)>; } +// The following class definition is basically a copy of the +// Lengthen_HalfSingle definition above, however with an additional parameter +// "RevLanes" to select the correct VREV32dXX instruction. This is to convert +// data loaded by VLD1LN into proper vector format in big endian mode. +multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, + string InsnLanes, string InsnTy, string RevLanes> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; +} + // extload, zextload and sextload for a lengthening load followed by another // lengthening load, to quadruple the initial length. // @@ -6134,6 +6432,36 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, dsub_0))>; } +// The following class definition is basically a copy of the +// Lengthen_Double definition above, however with an additional parameter +// "RevLanes" to select the correct VREV32dXX instruction. This is to convert +// data loaded by VLD1LN into proper vector format in big endian mode. +multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty, string RevLanes> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; +} + // extload, zextload and sextload for a lengthening load followed by another // lengthening load, to quadruple the initial length, but which ends up only // requiring half the available lanes (a 64-bit outcome instead of a 128-bit). @@ -6171,33 +6499,102 @@ multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, dsub_0)>; } +// The following class definition is basically a copy of the +// Lengthen_HalfDouble definition above, however with an additional VREV16d8 +// instruction to convert data loaded by VLD1LN into proper vector format +// in big endian mode. +multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; +} + defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 -defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 -defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 +let Predicates = [IsLE] in { + defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 + defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 + + // Double lengthening - v4i8 -> v4i16 -> v4i32 + defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; + // v2i8 -> v2i16 -> v2i32 + defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; + // v2i16 -> v2i32 -> v2i64 + defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; +} + +let Predicates = [IsBE] in { + defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 + defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 -// Double lengthening - v4i8 -> v4i16 -> v4i32 -defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; -// v2i8 -> v2i16 -> v2i32 -defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; -// v2i16 -> v2i32 -> v2i64 -defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; + // Double lengthening - v4i8 -> v4i16 -> v4i32 + defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; + // v2i8 -> v2i16 -> v2i32 + defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; + // v2i16 -> v2i32 -> v2i64 + defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; +} // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 -def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), - (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; -def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), - (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; -def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), - (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +let Predicates = [IsLE] in { + def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +} +// The following patterns are basically a copy of the patterns above, +// however with an additional VREV16d instruction to convert data +// loaded by VLD1LN into proper vector format in big endian mode. +let Predicates = [IsBE] in { + def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; +} //===----------------------------------------------------------------------===// // Assembler aliases @@ -6242,379 +6639,442 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD1LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD1LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD1LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD1LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD1LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VLD1LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; // VST1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST1LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST1LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST1LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST1LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST1LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VST1LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; // VLD2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD2LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD2LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD2LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD2LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VST2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST2LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VST2LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VST2LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST2LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VST2LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VLD3 all-lanes pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPqWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; // VLD3 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; // VLD3 multiple structure pseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VST3 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; @@ -6622,168 +7082,190 @@ def VST3LNqWB_register_Asm_32 : // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VLD4 all-lanes pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPqWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourDAllLanes:$list, + addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPqWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourQAllLanes:$list, + addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; // VLD4 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD4LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourDWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourQWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; @@ -6791,168 +7273,202 @@ def VLD4LNqWB_register_Asm_32 : // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; // VST4 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST4LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VST4LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST4LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourDWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; def VST4LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST4LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourQWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; // VST4 multiple structure pseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; // VMOV/VMVN takes an optional datatype suffix diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index af5ef537b536..e17f73af03ec 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -269,7 +269,8 @@ class T1SystemEncoding<bits<8> opc> let Inst{7-0} = opc; } -def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", []>, +def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", + [(int_arm_hint imm0_15:$imm)]>, T1SystemEncoding<0x00>, Requires<[IsThumb, HasV6M]> { bits<4> imm; @@ -288,7 +289,6 @@ def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157 def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> { let Predicates = [IsThumb2, HasV8]; } -def : T2Pat<(int_arm_sevl), (tHINT 5)>; // The imm operand $val can be used by a debugger to store more information // about the breakpoint. @@ -300,6 +300,8 @@ def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val", bits<8> val; let Inst{7-0} = val; } +// default immediate for breakpoint mnemonic +def : InstAlias<"bkpt", (tBKPT 0)>, Requires<[IsThumb]>; def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val", []>, T1Encoding<0b101110>, Requires<[IsThumb, HasV8]> { @@ -543,15 +545,15 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { (tBX GPR:$dst, (ops 14, zero_reg))>, Requires<[IsThumb]>, Sched<[WriteBr]>; } - // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls - // on IOS), so it's in ARMInstrThumb2.td. - // Non-IOS version: + // tTAILJMPd: MachO version uses a Thumb2 branch (no Thumb1 tail calls + // on MachO), so it's in ARMInstrThumb2.td. + // Non-MachO version: let Uses = [SP] in { def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, pred:$p), 4, IIC_Br, [], (tB t_brtarget:$dst, pred:$p)>, - Requires<[IsThumb, IsNotIOS]>, Sched<[WriteBr]>; + Requires<[IsThumb, IsNotMachO]>, Sched<[WriteBr]>; } } @@ -1191,6 +1193,15 @@ def tTST : // A8.6.230 [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>, Sched<[WriteALU]>; +// A8.8.247 UDF - Undefined (Encoding T1) +def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8", + [(int_arm_undefined imm0_255:$imm8)]>, Encoding16 { + bits<8> imm8; + let Inst{15-12} = 0b1101; + let Inst{11-8} = 0b1110; + let Inst{7-0} = imm8; +} + // Zero-extend byte def tUXTB : // A8.6.262 T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), @@ -1306,10 +1317,35 @@ def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs), def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs), (tSUBrr tGPR:$lhs, tGPR:$rhs)>; -// ConstantPool, GlobalAddress -def : T1Pat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>; +// Bswap 16 with load/store +def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)), + (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>; +def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)), + (tREV16 (tLDRHi t_addrmode_is2:$addr))>; +def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), + t_addrmode_rrs2:$addr), + (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>; +def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), + t_addrmode_is2:$addr), + (tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>; + +// ConstantPool def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; +// GlobalAddress +def tLDRLIT_ga_pcrel : PseudoInst<(outs tGPR:$dst), (ins i32imm:$addr), + IIC_iLoadiALU, + [(set tGPR:$dst, + (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsThumb, DontUseMovt]>; + +def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src), + IIC_iLoad_i, + [(set tGPR:$dst, + (ARMWrapper tglobaladdr:$src))]>, + Requires<[IsThumb, DontUseMovt]>; + + // JumpTable def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), (tLEApcrelJT tjumptable:$dst, imm:$id)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index 48acffd3a64e..85e93516807b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -1445,7 +1445,7 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, // Store doubleword let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), - (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr), + (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>; // Indexed stores @@ -1676,7 +1676,7 @@ defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>; // pci variant is very similar to i12, but supports negative offsets // from the PC. Only PLD and PLI have pci variants (not PLDW) class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr), - IIC_Preload, opc, "\t$addr", + IIC_Preload, opc, "\t$addr", [(ARMPreload (ARMWrapper tconstpool:$addr), (i32 0), (i32 inst))]>, Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; @@ -1918,7 +1918,7 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, let DecoderMethod = "DecodeT2MOVTWInstruction"; } -def : t2InstAlias<"mov${p} $Rd, $imm", +def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>; def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), @@ -2407,6 +2407,19 @@ def t2UBFX: T2TwoRegBitFI< let Inst{15} = 0; } +// A8.8.247 UDF - Undefined (Encoding T2) +def t2UDF : T2XI<(outs), (ins imm0_65535:$imm16), IIC_Br, "udf.w\t$imm16", + [(int_arm_undefined imm0_65535:$imm16)]> { + bits<16> imm16; + let Inst{31-29} = 0b111; + let Inst{28-27} = 0b10; + let Inst{26-20} = 0b1111111; + let Inst{19-16} = imm16{15-12}; + let Inst{15} = 0b1; + let Inst{14-12} = 0b010; + let Inst{11-0} = imm16{11-0}; +} + // A8.6.18 BFI - Bitfield insert (Encoding T1) let Constraints = "$src = $Rd" in { def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd), @@ -3196,27 +3209,28 @@ def t2MOVCCi32imm let hasSideEffects = 1 in { def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>, - Requires<[HasDB]> { + Requires<[IsThumb, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f5; let Inst{3-0} = opt; } -} def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>, - Requires<[HasDB]> { + Requires<[IsThumb, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f4; let Inst{3-0} = opt; } def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary, - "isb", "\t$opt", []>, Requires<[HasDB]> { + "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>, + Requires<[IsThumb, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f6; let Inst{3-0} = opt; } +} class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, @@ -3284,15 +3298,18 @@ def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2), def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexb", "\t$Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]>; + [(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]>; def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexh", "\t$Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]>; + [(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]>; def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaex", "\t$Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]> { + [(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]> { bits<4> Rt; bits<4> addr; let Inst{31-27} = 0b11101; @@ -3320,21 +3337,21 @@ def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexb", "\t$Rd, $Rt, $addr", "", - [(set rGPR:$Rd, (strex_1 rGPR:$Rt, - addr_offset_none:$addr))]>; + [(set rGPR:$Rd, + (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>; def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexh", "\t$Rd, $Rt, $addr", "", - [(set rGPR:$Rd, (strex_2 rGPR:$Rt, - addr_offset_none:$addr))]>; + [(set rGPR:$Rd, + (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>; def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, "strex", "\t$Rd, $Rt, $addr", "", - [(set rGPR:$Rd, (strex_4 rGPR:$Rt, - t2addrmode_imm0_1020s4:$addr))]> { + [(set rGPR:$Rd, + (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]> { bits<4> Rd; bits<4> Rt; bits<12> addr; @@ -3358,19 +3375,25 @@ def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "stlexb", "\t$Rd, $Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]>; + [(set rGPR:$Rd, + (stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]>; def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "stlexh", "\t$Rd, $Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]>; + [(set rGPR:$Rd, + (stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]>; def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "stlex", "\t$Rd, $Rt, $addr", "", - []>, Requires<[IsThumb, HasV8]> { + [(set rGPR:$Rd, + (stlex_4 rGPR:$Rt, addr_offset_none:$addr))]>, + Requires<[IsThumb, HasV8]> { bits<4> Rd; bits<4> Rt; bits<4> addr; @@ -3412,6 +3435,15 @@ def : T2Pat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), def : T2Pat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), (t2STREXH GPR:$Rt, addr_offset_none:$addr)>; +def : T2Pat<(and (ldaex_1 addr_offset_none:$addr), 0xff), + (t2LDAEXB addr_offset_none:$addr)>; +def : T2Pat<(and (ldaex_2 addr_offset_none:$addr), 0xffff), + (t2LDAEXH addr_offset_none:$addr)>; +def : T2Pat<(stlex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (t2STLEXB GPR:$Rt, addr_offset_none:$addr)>; +def : T2Pat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (t2STLEXH GPR:$Rt, addr_offset_none:$addr)>; + //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics // eh_sjlj_setjmp() is an instruction sequence to store the return @@ -3477,8 +3509,8 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let Inst{25-16} = target{20-11}; let Inst{10-0} = target{10-0}; let DecoderMethod = "DecodeT2BInstruction"; - let AsmMatchConverter = "cvtThumbBranches"; -} + let AsmMatchConverter = "cvtThumbBranches"; +} let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : t2PseudoInst<(outs), @@ -3549,7 +3581,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, let AsmMatchConverter = "cvtThumbBranches"; } -// Tail calls. The IOS version of thumb tail calls uses a t2 branch, so +// Tail calls. The MachO version of thumb tail calls uses a t2 branch, so // it goes here. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS version. @@ -3558,7 +3590,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { (ins uncondbrtarget:$dst, pred:$p), 4, IIC_Br, [], (t2B uncondbrtarget:$dst, pred:$p)>, - Requires<[IsThumb2, IsIOS]>, Sched<[WriteBr]>; + Requires<[IsThumb2, IsMachO]>, Sched<[WriteBr]>; } // IT block @@ -3653,7 +3685,8 @@ def : t2InstAlias<"cps.w $mode", (t2CPS1p imm0_31:$mode), 0>; // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions -def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",[]> { +def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm", + [(int_arm_hint imm0_239:$imm)]> { bits<8> imm; let Inst{31-3} = 0b11110011101011111000000000000; let Inst{7-0} = imm; @@ -3680,7 +3713,7 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { // Secure Monitor Call is a system instruction. // Option = Inst{19-16} -def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", +def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []>, Requires<[IsThumb2, HasTrustZone]> { let Inst{31-27} = 0b11110; let Inst{26-20} = 0b1111111; @@ -3781,7 +3814,7 @@ def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary, let isReMaterializable = 1, isMoveImm = 1 in def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2, [(set rGPR:$dst, (i32 imm:$src))]>, - Requires<[IsThumb, HasV6T2]>; + Requires<[IsThumb, UseMovt]>; // Pseudo instruction that combines movw + movt + add pc (if pic). // It also makes it possible to rematerialize the instructions. @@ -3793,15 +3826,9 @@ def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, Requires<[IsThumb2, UseMovt]>; -def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2, - [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, - Requires<[IsThumb2, UseMovt]>; } // ConstantPool, GlobalAddress, and JumpTable -def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, - Requires<[IsThumb2, DontUseMovt]>; def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>, Requires<[IsThumb2, UseMovt]>; @@ -4266,7 +4293,7 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm", // Aliases for ADD without the ".w" optional width specifier. def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", - (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, + (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${p} $Rd, $Rn, $imm", (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; @@ -4371,7 +4398,7 @@ def : t2InstAlias<"ldrsh${p} $Rt, $addr", (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; def : t2InstAlias<"ldr${p} $Rt, $addr", - (t2LDRpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; + (t2LDRpci GPRnopc:$Rt, t2ldrlabel:$addr, pred:$p)>; def : t2InstAlias<"ldrb${p} $Rt, $addr", (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; def : t2InstAlias<"ldrh${p} $Rt, $addr", diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index a8cdc5ca0637..55a6efcb4c04 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -200,13 +200,34 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>; +defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>; } // neverHasSideEffects def : MnemonicAlias<"vldm", "vldmia">; def : MnemonicAlias<"vstm", "vstmia">; +// FLDM/FSTM - Load / Store multiple single / double precision registers for +// pre-ARMv6 cores. +// These instructions are deprecated! +def : VFP2MnemonicAlias<"fldmias", "vldmia">; +def : VFP2MnemonicAlias<"fldmdbs", "vldmdb">; +def : VFP2MnemonicAlias<"fldmeas", "vldmdb">; +def : VFP2MnemonicAlias<"fldmfds", "vldmia">; +def : VFP2MnemonicAlias<"fldmiad", "vldmia">; +def : VFP2MnemonicAlias<"fldmdbd", "vldmdb">; +def : VFP2MnemonicAlias<"fldmead", "vldmdb">; +def : VFP2MnemonicAlias<"fldmfdd", "vldmia">; + +def : VFP2MnemonicAlias<"fstmias", "vstmia">; +def : VFP2MnemonicAlias<"fstmdbs", "vstmdb">; +def : VFP2MnemonicAlias<"fstmeas", "vstmia">; +def : VFP2MnemonicAlias<"fstmfds", "vstmdb">; +def : VFP2MnemonicAlias<"fstmiad", "vstmia">; +def : VFP2MnemonicAlias<"fstmdbd", "vstmdb">; +def : VFP2MnemonicAlias<"fstmead", "vstmia">; +def : VFP2MnemonicAlias<"fstmfdd", "vstmdb">; + def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>, Requires<[HasVFP2]>; def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>, @@ -247,7 +268,7 @@ multiclass vfp_ldstx_mult<string asm, bit L_bit> { AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), IndexModeUpd, !strconcat(asm, "dbx${p}\t$Rn!, $regs"), "$Rn = $wb", []> { let Inst{24-23} = 0b10; // Decrement Before - let Inst{21} = 1; + let Inst{21} = 1; // Writeback let Inst{20} = L_bit; } } @@ -255,6 +276,12 @@ multiclass vfp_ldstx_mult<string asm, bit L_bit> { defm FLDM : vfp_ldstx_mult<"fldm", 1>; defm FSTM : vfp_ldstx_mult<"fstm", 0>; +def : VFP2MnemonicAlias<"fldmeax", "fldmdbx">; +def : VFP2MnemonicAlias<"fldmfdx", "fldmiax">; + +def : VFP2MnemonicAlias<"fstmeax", "fstmiax">; +def : VFP2MnemonicAlias<"fstmfdx", "fstmdbx">; + //===----------------------------------------------------------------------===// // FP Binary Operations. // @@ -524,12 +551,6 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def : Pat<(f32_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; - -def : Pat<(f16_to_f32 GPR:$a), - (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; - def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; @@ -592,6 +613,19 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, let Inst{5} = Dm{4}; } +def : Pat<(fp_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + +def : Pat<(fp_to_f16 (f64 DPR:$a)), + (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>; + +def : Pat<(f16_to_fp GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : Pat<(f64 (f16_to_fp GPR:$a)), + (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>; + + multiclass vcvt_inst<string opc, bits<2> rm> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, @@ -1639,7 +1673,7 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), //===----------------------------------------------------------------------===// // Assembler aliases. // -// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to +// A few mnemonic aliases for pre-unifixed syntax. We don't guarantee to // support them all, but supporting at least some of the basics is // good to be friendly. def : VFP2MnemonicAlias<"flds", "vldr">; @@ -1735,3 +1769,14 @@ def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2", // VMOVD does. def : VFP2InstAlias<"vmov${p} $Sd, $Sm", (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>; + +// FCONSTD/FCONSTS alias for vmov.f64/vmov.f32 +// These aliases provide added functionality over vmov.f instructions by +// allowing users to write assembly containing encoded floating point constants +// (e.g. #0x70 vs #1.0). Without these alises there is no way for the +// assembler to accept encoded fp constants (but the equivalent fp-literal is +// accepted directly by vmovf). +def : VFP3InstAlias<"fconstd${p} $Dd, $val", + (FCONSTD DPR:$Dd, vfp_f64imm:$val, pred:$p)>; +def : VFP3InstAlias<"fconsts${p} $Sd, $val", + (FCONSTS SPR:$Sd, vfp_f32imm:$val, pred:$p)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp index 351a290e2aa0..6d1114d51aab 100644 --- a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp @@ -11,12 +11,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "ARMJITInfo.h" -#include "ARM.h" #include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" #include "ARMRelocations.h" -#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" @@ -26,6 +25,8 @@ #include <cstdlib> using namespace llvm; +#define DEBUG_TYPE "jit" + void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction"); } @@ -320,13 +321,13 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, break; } case ARM::reloc_arm_movw: { - ResultPtr = ResultPtr & 0xFFFF; + ResultPtr = ResultPtr & 0xFFFF; *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; break; } case ARM::reloc_arm_movt: { - ResultPtr = (ResultPtr >> 16) & 0xFFFF; + ResultPtr = (ResultPtr >> 16) & 0xFFFF; *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; break; @@ -334,3 +335,10 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, } } } + +void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) { + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + ConstPoolId2AddrMap.resize(AFI->getNumPICLabels()); + JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); + IsPIC = isPIC; +} diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.h b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h index 23a6a9b512f4..27e2a2013404 100644 --- a/contrib/llvm/lib/Target/ARM/ARMJITInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h @@ -14,7 +14,6 @@ #ifndef ARMJITINFO_H #define ARMJITINFO_H -#include "ARMMachineFunctionInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -53,45 +52,45 @@ namespace llvm { /// overwriting OLD with a branch to NEW. This is used for self-modifying /// code. /// - virtual void replaceMachineCodeForFunction(void *Old, void *New); + void replaceMachineCodeForFunction(void *Old, void *New) override; /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object /// to emit an indirect symbol which contains the address of the specified /// ptr. - virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, - JITCodeEmitter &JCE); + void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE) override; // getStubLayout - Returns the size and alignment of the largest call stub // on ARM. - virtual StubLayout getStubLayout(); + StubLayout getStubLayout() override; /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a /// small native function that simply calls the function at the specified /// address. - virtual void *emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE); + void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) override; /// getLazyResolverFunction - Expose the lazy resolver to the JIT. - virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; /// relocate - Before the JIT can run a block of code that has been emitted, /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. - virtual void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase); + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) override; /// hasCustomConstantPool - Allows a target to specify that constant /// pool address resolution is handled by the target. - virtual bool hasCustomConstantPool() const { return true; } + bool hasCustomConstantPool() const override { return true; } /// hasCustomJumpTables - Allows a target to specify that jumptables /// are emitted by the target. - virtual bool hasCustomJumpTables() const { return true; } + bool hasCustomJumpTables() const override { return true; } /// allocateSeparateGVMemory - If true, globals should be placed in /// separately allocated heap memory rather than in the same /// code memory allocated by JITCodeEmitter. - virtual bool allocateSeparateGVMemory() const { + bool allocateSeparateGVMemory() const override { #ifdef __APPLE__ return true; #else @@ -103,12 +102,7 @@ namespace llvm { /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize /// jump table ids to jump table bases map; remember if codegen relocation /// model is PIC. - void Initialize(const MachineFunction &MF, bool isPIC) { - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - ConstPoolId2AddrMap.resize(AFI->getNumPICLabels()); - JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); - IsPIC = isPIC; - } + void Initialize(const MachineFunction &MF, bool isPIC); /// getConstantPoolEntryAddr - The ARM target puts all constant /// pool entries into constant islands. This returns the address of the diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 61596d54b692..a03bcdbddd79 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -12,12 +12,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb1RegisterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -36,12 +38,13 @@ #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "arm-ldst-opt" + STATISTIC(NumLDMGened , "Number of ldm instructions generated"); STATISTIC(NumSTMGened , "Number of stm instructions generated"); STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); @@ -65,13 +68,14 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; + const TargetLowering *TL; ARMFunctionInfo *AFI; RegScavenger *RS; - bool isThumb2; + bool isThumb1, isThumb2; - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "ARM load / store optimization pass"; } @@ -93,7 +97,10 @@ namespace { void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd); - + void UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, unsigned Base, unsigned WordOffset, + ARMCC::CondCodes Pred, unsigned PredReg); bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, @@ -119,7 +126,6 @@ namespace { ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, SmallVectorImpl<MachineBasicBlock::iterator> &Merges); - void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); @@ -159,6 +165,21 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::STMDB; case ARM_AM::ib: return ARM::STMIB; } + case ARM::tLDRi: + // tLDMIA is writeback-only - unless the base register is in the input + // reglist. + ++NumLDMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tLDMIA; + } + case ARM::tSTRi: + // There is no non-writeback tSTMIA either. + ++NumSTMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tSTMIA_UPD; + } case ARM::t2LDRi8: case ARM::t2LDRi12: ++NumLDMGened; @@ -217,6 +238,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::LDMIA_UPD: case ARM::STMIA: case ARM::STMIA_UPD: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA_RET: case ARM::t2LDMIA: case ARM::t2LDMIA_UPD: @@ -263,12 +287,20 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { } // end namespace ARM_AM } // end namespace llvm +static bool isT1i32Load(unsigned Opc) { + return Opc == ARM::tLDRi; +} + static bool isT2i32Load(unsigned Opc) { return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; } static bool isi32Load(unsigned Opc) { - return Opc == ARM::LDRi12 || isT2i32Load(Opc); + return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; +} + +static bool isT1i32Store(unsigned Opc) { + return Opc == ARM::tSTRi; } static bool isT2i32Store(unsigned Opc) { @@ -276,7 +308,102 @@ static bool isT2i32Store(unsigned Opc) { } static bool isi32Store(unsigned Opc) { - return Opc == ARM::STRi12 || isT2i32Store(Opc); + return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); +} + +static unsigned getImmScale(unsigned Opc) { + switch (Opc) { + default: llvm_unreachable("Unhandled opcode!"); + case ARM::tLDRi: + case ARM::tSTRi: + return 1; + case ARM::tLDRHi: + case ARM::tSTRHi: + return 2; + case ARM::tLDRBi: + case ARM::tSTRBi: + return 4; + } +} + +/// Update future uses of the base register with the offset introduced +/// due to writeback. This function only works on Thumb1. +void +ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, unsigned Base, + unsigned WordOffset, + ARMCC::CondCodes Pred, unsigned PredReg) { + assert(isThumb1 && "Can only update base register uses for Thumb1!"); + + // Start updating any instructions with immediate offsets. Insert a sub before + // the first non-updateable instruction (if any). + for (; MBBI != MBB.end(); ++MBBI) { + if (MBBI->readsRegister(Base)) { + unsigned Opc = MBBI->getOpcode(); + int Offset; + bool InsertSub = false; + + if (Opc == ARM::tLDRi || Opc == ARM::tSTRi || + Opc == ARM::tLDRHi || Opc == ARM::tSTRHi || + Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) { + // Loads and stores with immediate offsets can be updated, but only if + // the new offset isn't negative. + // The MachineOperand containing the offset immediate is the last one + // before predicates. + MachineOperand &MO = + MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); + // The offsets are scaled by 1, 2 or 4 depending on the Opcode + Offset = MO.getImm() - WordOffset * getImmScale(Opc); + if (Offset >= 0) + MO.setImm(Offset); + else + InsertSub = true; + + } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) { + // SUB/ADD using this register. Merge it with the update. + // If the merged offset is too large, insert a new sub instead. + MachineOperand &MO = + MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); + Offset = (Opc == ARM::tSUBi8) ? + MO.getImm() + WordOffset * 4 : + MO.getImm() - WordOffset * 4 ; + if (TL->isLegalAddImmediate(Offset)) { + MO.setImm(Offset); + // The base register has now been reset, so exit early. + return; + } else { + InsertSub = true; + } + + } else { + // Can't update the instruction. + InsertSub = true; + } + + if (InsertSub) { + // An instruction above couldn't be updated, so insert a sub. + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base)) + .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) + .addImm(Pred).addReg(PredReg); + return; + } + } + + if (MBBI->killsRegister(Base)) + // Register got killed. Stop updating. + return; + } + + // The end of the block was reached. This means register liveness escapes the + // block, and it's necessary to insert a sub before the last instruction. + if (MBB.succ_size() > 0) + // But only insert the SUB if there is actually a successor block. + // FIXME: Check more carefully if register is live at this point, e.g. by + // also examining the successor block's register liveness information. + AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base)) + .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) + .addImm(Pred).addReg(PredReg); } /// MergeOps - Create and insert a LDM or STM with Base as base register and @@ -296,18 +423,19 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - // VFP and Thumb2 do not support IB or DA modes. + // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); - bool haveIBAndDA = isNotVFP && !isThumb2; - if (Offset == 4 && haveIBAndDA) + bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; + + if (Offset == 4 && haveIBAndDA) { Mode = ARM_AM::ib; - else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) + } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { Mode = ARM_AM::da; - else if (Offset == -4 * (int)NumRegs && isNotVFP) + } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; - else if (Offset != 0) { - // Check if this is a supported opcode before we insert instructions to + } else if (Offset != 0) { + // Check if this is a supported opcode before inserting instructions to // calculate a new base register. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; @@ -318,41 +446,98 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; unsigned NewBase; - if (isi32Load(Opcode)) + if (isi32Load(Opcode)) { // If it is a load, then just use one of the destination register to // use as the new base. NewBase = Regs[NumRegs-1].first; - else { + } else { // Use the scratch register to use as a new base. NewBase = Scratch; if (NewBase == 0) return false; } - int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; + + int BaseOpc = + isThumb2 ? ARM::t2ADDri : + isThumb1 ? ARM::tADDi8 : ARM::ADDri; + if (Offset < 0) { - BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; + BaseOpc = + isThumb2 ? ARM::t2SUBri : + isThumb1 ? ARM::tSUBi8 : ARM::SUBri; Offset = - Offset; } - int ImmedOffset = isThumb2 - ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); - if (ImmedOffset == -1) - // FIXME: Try t2ADDri12 or t2SUBri12? - return false; // Probably not worth it then. - - BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) - .addImm(Pred).addReg(PredReg).addReg(0); + + if (!TL->isLegalAddImmediate(Offset)) + // FIXME: Try add with register operand? + return false; // Probably not worth it then. + + if (isThumb1) { + if (Base != NewBase) { + // Need to insert a MOV to the new base first. + // FIXME: If the immediate fits in 3 bits, use ADD instead. + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(Pred).addReg(PredReg); + } + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)) + .addReg(NewBase, getKillRegState(true)).addImm(Offset) + .addImm(Pred).addReg(PredReg); + } else { + BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + .addImm(Pred).addReg(PredReg).addReg(0); + } + Base = NewBase; - BaseKill = true; // New base is always killed right its use. + BaseKill = true; // New base is always killed straight away. } bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); + + // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with + // base register writeback. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); if (!Opcode) return false; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(Pred).addReg(PredReg); + + bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. + + // Exception: If the base register is in the input reglist, Thumb1 LDM is + // non-writeback. Check for this. + if (Opcode == ARM::tLDMIA && isThumb1) + for (unsigned I = 0; I < NumRegs; ++I) + if (Base == Regs[I].first) { + Writeback = false; + break; + } + + MachineInstrBuilder MIB; + + if (Writeback) { + if (Opcode == ARM::tLDMIA) + // Update tLDMIA with writeback if necessary. + Opcode = ARM::tLDMIA_UPD; + + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + + // Thumb1: we might need to set base writeback when building the MI. + MIB.addReg(Base, getDefRegState(true)) + .addReg(Base, getKillRegState(BaseKill)); + + // The base isn't dead after a merged instruction with writeback. Update + // future uses of the base with the added offset (if possible), or reset + // the base register as necessary. + if (!BaseKill) + UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); + } else { + // No writeback, simply build the MachineInstr. + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + MIB.addReg(Base, getKillRegState(BaseKill)); + } + + MIB.addImm(Pred).addReg(PredReg); + for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -484,7 +669,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, return; // Merge succeeded, update records. - Merges.push_back(prior(Loc)); + Merges.push_back(std::prev(Loc)); // In gathering loads together, we may have moved the imp-def of a register // past one of its uses. This is OK, since we know better than the rest of @@ -492,7 +677,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, // affected uses. for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), E = UsesOfImpDefs.end(); - I != E; ++I) + I != E; ++I) (*I)->setIsUndef(); for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { @@ -589,7 +774,6 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); - return; } static bool definesCPSR(MachineInstr *MI) { @@ -616,6 +800,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tSUBi8: case ARM::t2SUBri: case ARM::SUBri: CheckCPSRDef = true; @@ -628,10 +813,11 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; - unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || + MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && + (MI->getOperand(2).getImm() * Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -649,6 +835,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tADDi8: case ARM::t2ADDri: case ARM::ADDri: CheckCPSRDef = true; @@ -661,10 +848,11 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, // Make sure the offset fits in 8 bits. return false; - unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tADDspi || + MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && + (MI->getOperand(2).getImm() * Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -677,6 +865,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { default: return 0; case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -695,6 +885,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STMDA: case ARM::STMDB: case ARM::STMIB: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA: case ARM::t2LDMDB: case ARM::t2STMIA: @@ -791,6 +984,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 is already using updating loads/stores. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(0).getReg(); bool BaseKill = MI->getOperand(0).isKill(); @@ -812,7 +1008,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, // Try merging with the previous instruction. MachineBasicBlock::iterator BeginMBBI = MBB.begin(); if (MBBI != BeginMBBI) { - MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) --PrevMBBI; if (Mode == ARM_AM::ia && @@ -831,7 +1027,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, // Try merging with the next instruction. MachineBasicBlock::iterator EndMBBI = MBB.end(); if (!DoMerge && MBBI != EndMBBI) { - MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + MachineBasicBlock::iterator NextMBBI = std::next(MBBI); while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) ++NextMBBI; if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && @@ -927,6 +1123,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, const TargetInstrInfo *TII, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 doesn't have updating LDR/STR. + // FIXME: Use LDM/STM with single register instead. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(1).getReg(); bool BaseKill = MI->getOperand(1).isKill(); @@ -959,7 +1159,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, // Try merging with the previous instruction. MachineBasicBlock::iterator BeginMBBI = MBB.begin(); if (MBBI != BeginMBBI) { - MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) --PrevMBBI; if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { @@ -978,7 +1178,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, // Try merging with the next instruction. MachineBasicBlock::iterator EndMBBI = MBB.end(); if (!DoMerge && MBBI != EndMBBI) { - MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + MachineBasicBlock::iterator NextMBBI = std::next(MBBI); while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) ++NextMBBI; if (!isAM5 && @@ -1002,7 +1202,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; if (isAM5) { - // VLDM[SD}_UPD, VSTM[SD]_UPD + // VLDM[SD]_UPD, VSTM[SD]_UPD // (There are no base-updating versions of VLDR/VSTR instructions, but the // updating load/store-multiple instructions can be used with only one // register.) @@ -1100,6 +1300,8 @@ static bool isMemoryOp(const MachineInstr *MI) { return MI->getOperand(1).isReg(); case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -1122,7 +1324,7 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { } if (Loc != MBB.begin()) - RS->forward(prior(Loc)); + RS->forward(std::prev(Loc)); } static int getMemoryOpOffset(const MachineInstr *MI) { @@ -1137,6 +1339,10 @@ static int getMemoryOpOffset(const MachineInstr *MI) { Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) return OffField; + // Thumb1 immediate offsets are scaled by 4 + if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) + return OffField * 4; + int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; if (isAM3) { @@ -1232,7 +1438,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); ++NumSTRD2STM; } - NewBBI = llvm::prior(MBBI); + NewBBI = std::prev(MBBI); } else { // Split into two instructions. unsigned NewOpc = (isLd) @@ -1254,7 +1460,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); - NewBBI = llvm::prior(MBBI); + NewBBI = std::prev(MBBI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1274,7 +1480,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); - NewBBI = llvm::prior(MBBI); + NewBBI = std::prev(MBBI); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1408,18 +1614,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (MBBI == E) // Reach the end of the block, try merging the memory instructions. TryMerge = true; - } else + } else { TryMerge = true; + } if (TryMerge) { if (NumMemOps > 1) { // Try to find a free register to use as a new base in case it's needed. // First advance to the instruction just before the start of the chain. AdvanceRS(MBB, MemOps); + // Find a scratch register. - unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); + unsigned Scratch = + RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); + // Process the load / store instructions. - RS->forward(prior(MBBI)); + RS->forward(std::prev(MBBI)); // Merge ops. Merges.clear(); @@ -1441,13 +1651,13 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { ++NumMerges; // RS may be pointing to an instruction that's deleted. - RS->skipTo(prior(MBBI)); + RS->skipTo(std::prev(MBBI)); } else if (NumMemOps == 1) { // Try folding preceding/trailing base inc/dec into the single // load/store. if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { ++NumMerges; - RS->forward(prior(MBBI)); + RS->forward(std::prev(MBBI)); } } @@ -1483,6 +1693,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { /// => /// ldmfd sp!, {..., pc} bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { + // Thumb1 LDM doesn't allow high registers. + if (isThumb1) return false; if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); @@ -1490,7 +1702,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::MOVPCLR)) { - MachineInstr *PrevMI = prior(MBBI); + MachineInstr *PrevMI = std::prev(MBBI); unsigned Opcode = PrevMI->getOpcode(); if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || @@ -1513,12 +1725,20 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); + TL = TM.getTargetLowering(); AFI = Fn.getInfo<ARMFunctionInfo>(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); STI = &TM.getSubtarget<ARMSubtarget>(); RS = new RegScavenger(); isThumb2 = AFI->isThumb2Function(); + isThumb1 = AFI->isThumbFunction() && !isThumb2; + + // FIXME: Temporarily disabling for Thumb-1 due to miscompiles + if (isThumb1) { + delete RS; + return false; + } bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; @@ -1550,9 +1770,9 @@ namespace { MachineRegisterInfo *MRI; MachineFunction *MF; - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "ARM pre- register allocation load / store optimization pass"; } @@ -1666,11 +1886,11 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD unsigned Scale = 1; unsigned Opcode = Op0->getOpcode(); - if (Opcode == ARM::LDRi12) + if (Opcode == ARM::LDRi12) { NewOpc = ARM::LDRD; - else if (Opcode == ARM::STRi12) + } else if (Opcode == ARM::STRi12) { NewOpc = ARM::STRD; - else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { + } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { NewOpc = ARM::t2LDRDi8; Scale = 4; isT2 = true; @@ -1678,8 +1898,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, NewOpc = ARM::t2STRDi8; Scale = 4; isT2 = true; - } else + } else { return false; + } // Make sure the base address satisfies i64 ld / st alignment requirement. // At the moment, we ignore the memoryoperand's value. @@ -1724,17 +1945,6 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return true; } -namespace { - struct OffsetCompare { - bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { - int LOffset = getMemoryOpOffset(LHS); - int ROffset = getMemoryOpOffset(RHS); - assert(LHS == RHS || LOffset != ROffset); - return LOffset > ROffset; - } - }; -} - bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &Ops, unsigned Base, bool isLd, @@ -1742,7 +1952,13 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, bool RetVal = false; // Sort by offset (in reverse order). - std::sort(Ops.begin(), Ops.end(), OffsetCompare()); + std::sort(Ops.begin(), Ops.end(), + [](const MachineInstr *LHS, const MachineInstr *RHS) { + int LOffset = getMemoryOpOffset(LHS); + int ROffset = getMemoryOpOffset(RHS); + assert(LHS == RHS || LOffset != ROffset); + return LOffset > ROffset; + }); // The loads / stores of the same base are in order. Scan them from first to // last and check for the following: @@ -1751,8 +1967,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, while (Ops.size() > 1) { unsigned FirstLoc = ~0U; unsigned LastLoc = 0; - MachineInstr *FirstOp = 0; - MachineInstr *LastOp = 0; + MachineInstr *FirstOp = nullptr; + MachineInstr *LastOp = nullptr; int LastOffset = 0; unsigned LastOpcode = 0; unsigned LastBytes = 0; diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index e12c9c61ab14..023f5f8e37a0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -14,25 +14,27 @@ #include "ARM.h" #include "ARMAsmPrinter.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Mangler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/Mangler.h" using namespace llvm; MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol) { const MCExpr *Expr; - switch (MO.getTargetFlags()) { + unsigned Option = MO.getTargetFlags() & ARMII::MO_OPTION_MASK; + switch (Option) { default: { Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); - switch (MO.getTargetFlags()) { + switch (Option) { default: llvm_unreachable("Unknown target flag on symbol operand"); - case 0: + case ARMII::MO_NO_FLAG: break; case ARMII::MO_LO16: Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, @@ -49,7 +51,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, } case ARMII::MO_PLT: - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_PLT, OutContext); break; } @@ -81,9 +83,11 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( MO.getMBB()->getSymbol(), OutContext)); break; - case MachineOperand::MO_GlobalAddress: - MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal())); + case MachineOperand::MO_GlobalAddress: { + MCOp = GetSymbolRef(MO, + GetARMGVSymbol(MO.getGlobal(), MO.getTargetFlags())); break; + } case MachineOperand::MO_ExternalSymbol: MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp index af445e2f35aa..892b269fc181 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -12,3 +12,13 @@ using namespace llvm; void ARMFunctionInfo::anchor() { } + +ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) + : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), + hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), + StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false), + RestoreSPFromFP(false), LRSpilledForFarJump(false), + FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), + GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0), + PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), + GlobalBaseReg(0) {} diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 010edf33cea4..d3fabc3ebb04 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" namespace llvm { @@ -38,7 +39,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// StByValParamsPadding - For parameter that is split between /// GPRs and memory; while recovering GPRs part, when - /// StackAlignment == 8, and GPRs-part-size mod 8 != 0, + /// StackAlignment > 4, and GPRs-part-size mod StackAlignment != 0, /// we need to insert gap before parameter start address. It allows to /// "attach" GPR-part to the part that was passed via stack. unsigned StByValParamsPadding; @@ -114,6 +115,14 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// relocation models. unsigned GlobalBaseReg; + /// ArgumentStackSize - amount of bytes on stack consumed by the arguments + /// being passed on the stack + unsigned ArgumentStackSize; + + /// CoalescedWeights - mapping of basic blocks to the rolling counter of + /// coalesced weights. + DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights; + public: ARMFunctionInfo() : isThumb(false), @@ -126,16 +135,7 @@ public: JumpTableUId(0), PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} - explicit ARMFunctionInfo(MachineFunction &MF) : - isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), - hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), - StByValParamsPadding(0), - ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), - LRSpilledForFarJump(false), - FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), - GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), - JumpTableUId(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} + explicit ARMFunctionInfo(MachineFunction &MF); bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -182,6 +182,9 @@ public: void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; } + unsigned getArgumentStackSize() const { return ArgumentStackSize; } + void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; } + unsigned createJumpTableUId() { return JumpTableUId++; } @@ -213,7 +216,7 @@ public: void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) { if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second) - assert(0 && "Duplicate entries!"); + llvm_unreachable("Duplicate entries!"); } unsigned getOriginalCPIdx(unsigned CloneIdx) const { @@ -223,6 +226,15 @@ public: else return -1U; } + + DenseMap<const MachineBasicBlock*, unsigned>::iterator getCoalescedWeight( + MachineBasicBlock* MBB) { + auto It = CoalescedWeights.find(MBB); + if (It == CoalescedWeights.end()) { + It = CoalescedWeights.insert(std::make_pair(MBB, 0)).first; + } + return It; + } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp new file mode 100644 index 000000000000..2a4925572ae8 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -0,0 +1,101 @@ +//===-- ARMOptimizeBarriersPass - two DMBs without a memory access in between, +//removed one -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===------------------------------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMInstrInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "double barriers" + +STATISTIC(NumDMBsRemoved, "Number of DMBs removed"); + +namespace { +class ARMOptimizeBarriersPass : public MachineFunctionPass { +public: + static char ID; + ARMOptimizeBarriersPass() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &Fn) override; + + const char *getPassName() const override { + return "optimise barriers pass"; + } + +private: +}; +char ARMOptimizeBarriersPass::ID = 0; +} + +// Returns whether the instruction can safely move past a DMB instruction +// The current implementation allows this iif MI does not have any possible +// memory access +static bool CanMovePastDMB(const MachineInstr *MI) { + return !(MI->mayLoad() || + MI->mayStore() || + MI->hasUnmodeledSideEffects() || + MI->isCall() || + MI->isReturn()); +} + +bool ARMOptimizeBarriersPass::runOnMachineFunction(MachineFunction &MF) { + // Vector to store the DMBs we will remove after the first iteration + std::vector<MachineInstr *> ToRemove; + // DMBType is the Imm value of the first operand. It determines whether it's a + // DMB ish, dmb sy, dmb osh, etc + int64_t DMBType = -1; + + // Find a dmb. If we can move it until the next dmb, tag the second one for + // removal + for (auto &MBB : MF) { + // Will be true when we have seen a DMB, and not seen any instruction since + // that cannot move past a DMB + bool IsRemovableNextDMB = false; + for (auto &MI : MBB) { + if (MI.getOpcode() == ARM::DMB) { + if (IsRemovableNextDMB) { + // If the Imm of this DMB is the same as that of the last DMB, we can + // tag this second DMB for removal + if (MI.getOperand(0).getImm() == DMBType) { + ToRemove.push_back(&MI); + } else { + // If it has a different DMBType, we cannot remove it, but will scan + // for the next DMB, recording this DMB's type as last seen DMB type + DMBType = MI.getOperand(0).getImm(); + } + } else { + // After we see a DMB, a next one is removable + IsRemovableNextDMB = true; + DMBType = MI.getOperand(0).getImm(); + } + } else if (!CanMovePastDMB(&MI)) { + // If we find an instruction unable to pass past a DMB, a next DMB is + // not removable + IsRemovableNextDMB = false; + } + } + } + // Remove the tagged DMB + for (auto MI : ToRemove) { + MI->eraseFromParent(); + ++NumDMBsRemoved; + } + + return NumDMBsRemoved > 0; +} + +/// createARMOptimizeBarriersPass - Returns an instance of the remove double +/// barriers +/// pass. +FunctionPass *llvm::createARMOptimizeBarriersPass() { + return new ARMOptimizeBarriersPass(); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp index a7880364d89a..80b4b4822ce8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp @@ -12,8 +12,6 @@ //===----------------------------------------------------------------------===// #include "ARMRegisterInfo.h" -#include "ARM.h" -#include "ARMBaseInstrInfo.h" using namespace llvm; void ARMRegisterInfo::anchor() { } diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h index fb1537cf9425..3e6af3f04698 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h @@ -14,9 +14,7 @@ #ifndef ARMREGISTERINFO_H #define ARMREGISTERINFO_H -#include "ARM.h" #include "ARMBaseRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td index d0457618ef6f..b290e7f6679c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -116,13 +116,13 @@ def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>; } // VFP3 defines 16 additional double registers -def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>; +def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>; def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>; def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>; def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>; def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>; def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>; -def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>; +def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>; def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>; def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>; def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>; @@ -158,11 +158,11 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // Current Program Status Register. // We model fpscr with two registers: FPSCR models the control bits and will be // reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV -// models the APSR when it's accessed by some special instructions. In such cases +// models the APSR when it's accessed by some special instructions. In such cases // it has the same encoding as PC. def CPSR : ARMReg<0, "cpsr">; def APSR : ARMReg<1, "apsr">; -def APSR_NZCV : ARMReg<15, "apsr_nzcv">; +def APSR_NZCV : ARMReg<15, "apsr_nzcv">; def SPSR : ARMReg<2, "spsr">; def FPSCR : ARMReg<3, "fpscr">; def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { @@ -214,7 +214,7 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { } // GPRs without the PC but with APSR. Some instructions allow accessing the -// APSR, while actually encoding PC in the register field. This is usefull +// APSR, while actually encoding PC in the register field. This is useful // for assembly and disassembly only. def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)> { let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td index 603e775d351d..9a1d22275646 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td @@ -1894,16 +1894,26 @@ def CortexA9Model : SchedMachineModel { let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; + + // FIXME: Many vector operations were never given an itinerary. We + // haven't mapped these to the new model either. + let CompleteModel = 0; } //===----------------------------------------------------------------------===// // Define each kind of processor resource and number available. +// +// The AGU unit has BufferSize=1 so that the latency between operations +// that use it are considered to stall other operations. +// +// The FP unit has BufferSize=0 so that it is a hard dispatch +// hazard. No instruction may be dispatched while the unit is reserved. let SchedModel = CortexA9Model in { def A9UnitALU : ProcResource<2>; def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } -def A9UnitAGU : ProcResource<1>; +def A9UnitAGU : ProcResource<1> { let BufferSize = 1; } def A9UnitLS : ProcResource<1>; def A9UnitFP : ProcResource<1> { let BufferSize = 0; } def A9UnitB : ProcResource<1>; @@ -2217,7 +2227,7 @@ def A9WriteLMfp : SchedWriteVariant<[ SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>; //===----------------------------------------------------------------------===// -// Resources for other (non LDM/VLDM) Variants. +// Resources for other (non-LDM/VLDM) Variants. // These mov immediate writers are unconditionally expanded with // additive latency. @@ -2397,6 +2407,7 @@ def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>; // ... // VHADD/VRHADD/VQADD/VTST/VADH/VRADH def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>; + // VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>; // VQNEG/VQABS @@ -2431,7 +2442,7 @@ def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>; def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>; // NEON permute -def :ItinRW<[A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; +def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2], [IIC_VPERMQ3, IIC_VEXTQ]>; def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>; diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td index 8d7dbc246095..b03d5ff44c6e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td @@ -1721,7 +1721,7 @@ let SchedModel = SwiftModel in { SchedVar<SwiftLMAddr3Pred, [SwiftWriteLM9Cy, SwiftWriteLM10Cy, SwiftWriteLM13CyNo, SwiftWriteP01OneCycle, SwiftVLDMPerm3]>, - // Load of a Q register (not neccessarily true). We should not be mapping to + // Load of a Q register (not necessarily true). We should not be mapping to // 4 S registers, either. SchedVar<SwiftLMAddr4Pred, [SwiftWriteLM4Cy, SwiftWriteLM4CyNo, SwiftWriteLM4CyNo, SwiftWriteLM4CyNo]>, @@ -1858,7 +1858,7 @@ let SchedModel = SwiftModel in { // Assume 5 D registers. SchedVar<SwiftLMAddr10Pred, [SwiftWriteSTM6]>, SchedVar<SwiftLMAddr11Pred, [SwiftWriteSTM12]>, - // Asume three Q registers. + // Assume three Q registers. SchedVar<SwiftLMAddr12Pred, [SwiftWriteSTM4]>, SchedVar<SwiftLMAddr13Pred, [SwiftWriteSTM14]>, // Assume 7 D registers. diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td index 0ace9bc1796d..57d0bfb65049 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td @@ -93,7 +93,7 @@ def ARMV6Itineraries : ProcessorItineraries< InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>, InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>, InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>, - + // Integer load pipeline // // Immediate offset @@ -181,7 +181,7 @@ def ARMV6Itineraries : ProcessorItineraries< // // Store multiple + update InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>, - + // Branch // // no delay slots, so the latency of a branch is unimportant diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 93add6ee33cf..3dcc0df349d2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -11,16 +11,15 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-selectiondag-info" #include "ARMTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DerivedTypes.h" using namespace llvm; -ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM) - : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget<ARMSubtarget>()) { -} +#define DEBUG_TYPE "arm-selectiondag-info" + +ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL) + : TargetSelectionDAGInfo(&DL) {} ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { } @@ -33,6 +32,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>(); // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) @@ -43,7 +43,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) + if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); unsigned BytesLeft = SizeVal & 3; @@ -52,9 +52,10 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, EVT VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; - const unsigned MAX_LOADS_IN_LDM = 6; - SDValue TFOps[MAX_LOADS_IN_LDM]; - SDValue Loads[MAX_LOADS_IN_LDM]; + // Emit a maximum of 4 loads in Thumb1 since we have fewer registers + const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6; + SDValue TFOps[6]; + SDValue Loads[6]; uint64_t SrcOff = 0, DstOff = 0; // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the @@ -71,7 +72,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { @@ -82,7 +84,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, isVolatile, false, 0); DstOff += VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); EmittedNumMemOps += i; } @@ -112,7 +115,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SrcOff += VTSize; BytesLeft -= VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); i = 0; BytesLeft = BytesLeftSave; @@ -133,7 +137,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, DstOff += VTSize; BytesLeft -= VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); } // Adjust parameters for memset, EABI uses format (ptr, size, value), @@ -145,8 +150,10 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { - // Use default for non AAPCS (or Darwin) subtargets - if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin()) + const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>(); + // Use default for non-AAPCS (or MachO) subtargets + if (!Subtarget.isAAPCS_ABI() || Subtarget.isTargetMachO() || + Subtarget.isTargetWindows()) return SDValue(); const ARMTargetLowering &TLI = @@ -179,22 +186,14 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, Args.push_back(Entry); // Emit __eabi_memset call - TargetLowering::CallLoweringInfo CLI(Chain, - Type::getVoidTy(*DAG.getContext()), // return type - false, // return sign ext - false, // return zero ext - false, // is var arg - false, // is in regs - 0, // number of fixed arguments - TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv - false, // is tail call - false, // does not return - false, // is return val used - DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), - TLI.getPointerTy()), // callee - Args, DAG, dl); - std::pair<SDValue,SDValue> CallResult = - TLI.LowerCallTo(CLI); - + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), + TLI.getPointerTy()), std::move(Args), 0) + .setDiscardResult(); + + std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index 56c9375855dd..13769dc8efe0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -36,31 +36,25 @@ namespace ARM_AM { } // end namespace ARM_AM class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can - /// make the right decision when generating code for different targets. - const ARMSubtarget *Subtarget; - public: - explicit ARMSelectionDAGInfo(const TargetMachine &TM); + explicit ARMSelectionDAGInfo(const DataLayout &DL); ~ARMSelectionDAGInfo(); - virtual SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const; + MachinePointerInfo SrcPtrInfo) const override; // Adjust parameters for memset, see RTABI section 4.3.4 - virtual SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo) const; + MachinePointerInfo DstPtrInfo) const override; }; } diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index a11629852ad7..c1b4562f4118 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -12,21 +12,33 @@ //===----------------------------------------------------------------------===// #include "ARMSubtarget.h" -#include "ARMBaseInstrInfo.h" -#include "ARMBaseRegisterInfo.h" +#include "ARMFrameLowering.h" +#include "ARMISelLowering.h" +#include "ARMInstrInfo.h" +#include "ARMJITInfo.h" +#include "ARMSelectionDAGInfo.h" +#include "ARMSubtarget.h" +#include "ARMMachineFunctionInfo.h" +#include "Thumb1FrameLowering.h" +#include "Thumb1InstrInfo.h" +#include "Thumb2InstrInfo.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/GlobalValue.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "arm-subtarget" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "ARMGenSubtargetInfo.inc" -using namespace llvm; - static cl::opt<bool> ReserveR9("arm-reserve-r9", cl::Hidden, cl::desc("Reserve R9, making it unavailable as GPR")); @@ -74,20 +86,89 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), "Allow IT blocks based on ARMv7"), clEnumValEnd)); -ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, const TargetOptions &Options) - : ARMGenSubtargetInfo(TT, CPU, FS) - , ARMProcFamily(Others) - , ARMProcClass(None) - , stackAlignment(4) - , CPUString(CPU) - , TargetTriple(TT) - , Options(Options) - , TargetABI(ARM_ABI_APCS) { +static std::string computeDataLayout(ARMSubtarget &ST) { + std::string Ret = ""; + + if (ST.isLittle()) + // Little endian. + Ret += "e"; + else + // Big endian. + Ret += "E"; + + Ret += DataLayout::getManglingComponent(ST.getTargetTriple()); + + // Pointers are 32 bits and aligned to 32 bits. + Ret += "-p:32:32"; + + // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to + // align to 32. + if (ST.isThumb()) + Ret += "-i1:8:32-i8:8:32-i16:16:32"; + + // ABIs other than APCS have 64 bit integers with natural alignment. + if (!ST.isAPCS_ABI()) + Ret += "-i64:64"; + + // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 + // bits, others to 64 bits. We always try to align to 64 bits. + if (ST.isAPCS_ABI()) + Ret += "-f64:32:64"; + + // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others + // to 64. We always ty to give them natural alignment. + if (ST.isAPCS_ABI()) + Ret += "-v64:32:64-v128:32:128"; + else + Ret += "-v128:64:128"; + + // On thumb and APCS, only try to align aggregates to 32 bits (the default is + // 64 bits). + if (ST.isThumb() || ST.isAPCS_ABI()) + Ret += "-a:0:32"; + + // Integer registers are 32 bits. + Ret += "-n32"; + + // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit + // aligned everywhere else. + if (ST.isTargetNaCl()) + Ret += "-S128"; + else if (ST.isAAPCS_ABI()) + Ret += "-S64"; + else + Ret += "-S32"; + + return Ret; +} + +/// initializeSubtargetDependencies - Initializes using a CPU and feature string +/// so that we can use initializer lists for subtarget initialization. +ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, + StringRef FS) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + return *this; } +ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, TargetMachine &TM, + bool IsLittle, const TargetOptions &Options) + : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), + ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), + TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN), + DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))), + TSInfo(DL), JITInfo(), + InstrInfo(isThumb1Only() + ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) + : !isThumb() + ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) + : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), + TLInfo(TM), + FrameLowering(!isThumb1Only() + ? new ARMFrameLowering(*this) + : (ARMFrameLowering *)new Thumb1FrameLowering(*this)) {} + void ARMSubtarget::initializeEnvironment() { HasV4TOps = false; HasV5TOps = false; @@ -110,7 +191,6 @@ void ARMSubtarget::initializeEnvironment() { InThumbMode = false; HasThumb2 = false; NoARM = false; - PostRAScheduler = false; IsR9Reserved = ReserveR9; UseMovt = false; SupportsTailCall = false; @@ -131,6 +211,7 @@ void ARMSubtarget::initializeEnvironment() { HasTrustZone = false; HasCrypto = false; HasCRC = false; + HasZeroCycleZeroing = false; AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; @@ -175,10 +256,9 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { } ParseSubtargetFeatures(CPUString, ArchFS); - // Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a - // ARM version or CPU and then remove this. - if (!HasV6T2Ops && hasThumb2()) - HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6MOps = HasV6T2Ops = true; + // FIXME: This used enable V6T2 support implicitly for Thumb2 mode. + // Assert this for now to make the change obvious. + assert(hasV6T2Ops() || !hasThumb2()); // Keep a pointer to static instruction cost data for the specified CPU. SchedModel = getSchedModelForCPU(CPUString); @@ -186,34 +266,54 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); - if ((TargetTriple.getTriple().find("eabi") != std::string::npos) || - (isTargetIOS() && isMClass())) - // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g. - // Darwin-EABI conforms to AACPS but not the rest of EABI. + if (TargetABI == ARM_ABI_UNKNOWN) { + switch (TargetTriple.getEnvironment()) { + case Triple::Android: + case Triple::EABI: + case Triple::EABIHF: + case Triple::GNUEABI: + case Triple::GNUEABIHF: + TargetABI = ARM_ABI_AAPCS; + break; + default: + if ((isTargetIOS() && isMClass()) || + (TargetTriple.isOSBinFormatMachO() && + TargetTriple.getOS() == Triple::UnknownOS)) + TargetABI = ARM_ABI_AAPCS; + else + TargetABI = ARM_ABI_APCS; + break; + } + } + + // FIXME: this is invalid for WindowsCE + if (isTargetWindows()) { TargetABI = ARM_ABI_AAPCS; + NoARM = true; + } if (isAAPCS_ABI()) stackAlignment = 8; + if (isTargetNaCl()) + stackAlignment = 16; UseMovt = hasV6T2Ops() && ArmUseMOVT; - if (!isTargetIOS()) { - IsR9Reserved = ReserveR9; - } else { + if (isTargetMachO()) { IsR9Reserved = ReserveR9 | !HasV6Ops; - SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0); + SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0); + } else { + IsR9Reserved = ReserveR9; + SupportsTailCall = !isThumb1Only(); } - if (!isThumb() || hasThumb2()) - PostRAScheduler = true; - switch (Align) { case DefaultAlign: // Assume pre-ARMv6 doesn't support unaligned accesses. // // ARMv6 may or may not support unaligned accesses depending on the // SCTLR.U bit, which is architecture-specific. We assume ARMv6 - // Darwin targets support unaligned accesses, and others don't. + // Darwin and NetBSD targets support unaligned accesses, and others don't. // // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit // which raises an alignment fault on unaligned accesses. Linux @@ -222,9 +322,15 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Linux targets support unaligned accesses. The same goes for NaCl. // // The above behavior is consistent with GCC. - AllowsUnalignedMem = ( - (hasV7Ops() && (isTargetLinux() || isTargetNaCl())) || - (hasV6Ops() && isTargetDarwin())); + AllowsUnalignedMem = + (hasV7Ops() && (isTargetLinux() || isTargetNaCl() || + isTargetNetBSD())) || + (hasV6Ops() && (isTargetMachO() || isTargetNetBSD())); + // The one exception is cortex-m0, which despite being v6, does not + // support unaligned accesses. Rather than make the above boolean + // expression even more obtuse, just override the value here. + if (isThumb1Only() && isMClass()) + AllowsUnalignedMem = false; break; case StrictAlign: AllowsUnalignedMem = false; @@ -266,7 +372,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, if (GV->isDeclaration() && !GV->isMaterializable()) isDecl = true; - if (!isTargetDarwin()) { + if (!isTargetMachO()) { // Extra load is needed for all externally visible. if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) return false; @@ -315,10 +421,20 @@ bool ARMSubtarget::hasSinCos() const { !getTargetTriple().isOSVersionLT(7, 0); } -bool ARMSubtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_NONE; - return PostRAScheduler && OptLevel >= CodeGenOpt::Default; +// This overrides the PostRAScheduler bit in the SchedModel for any CPU. +bool ARMSubtarget::enablePostMachineScheduler() const { + return (!isThumb() || hasThumb2()); +} + +bool ARMSubtarget::enableAtomicExpandLoadLinked() const { + return hasAnyDataBarrier() && !isThumb1Only(); +} + +bool ARMSubtarget::useMovt(const MachineFunction &MF) const { + // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit + // immediates as it is inherently position independent, and may be out of + // range otherwise. + return UseMovt && (isTargetWindows() || + !MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize)); } diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index 5276901bbb92..f8283b08d485 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -14,8 +14,20 @@ #ifndef ARMSUBTARGET_H #define ARMSUBTARGET_H + +#include "ARMFrameLowering.h" +#include "ARMISelLowering.h" +#include "ARMInstrInfo.h" +#include "ARMJITInfo.h" +#include "ARMSelectionDAGInfo.h" +#include "ARMSubtarget.h" +#include "Thumb1FrameLowering.h" +#include "Thumb1InstrInfo.h" +#include "Thumb2InstrInfo.h" +#include "ARMJITInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -31,7 +43,8 @@ class TargetOptions; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift, CortexA53, CortexA57 + Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, + CortexR5, Swift, CortexA53, CortexA57, Krait }; enum ARMProcClassEnum { None, AClass, RClass, MClass @@ -92,9 +105,6 @@ protected: /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM; - /// PostRAScheduler - True if using post-register-allocation scheduler. - bool PostRAScheduler; - /// IsR9Reserved - True if R9 is a not available as general purpose register. bool IsR9Reserved; @@ -172,6 +182,10 @@ protected: /// HasCRC - if true, processor supports CRC instructions bool HasCRC; + /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are + /// particularly effective at zeroing a VFP register. + bool HasZeroCycleZeroing; + /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory /// accesses for some types. For details, see /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). @@ -198,6 +212,9 @@ protected: /// CPUString - String name of used CPU. std::string CPUString; + /// IsLittle - The target is Little Endian + bool IsLittle; + /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -212,6 +229,7 @@ protected: public: enum { + ARM_ABI_UNKNOWN, ARM_ABI_APCS, ARM_ABI_AAPCS // ARM EABI } TargetABI; @@ -220,22 +238,45 @@ protected: /// of the specified triple. /// ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, const TargetOptions &Options); + const std::string &FS, TargetMachine &TM, bool IsLittle, + const TargetOptions &Options); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { - // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1. - // Change this once Thumb1 ldmia / stmia support is added. - return isThumb1Only() ? 0 : 64; + return 64; } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); /// \brief Reset the features for the ARM target. - virtual void resetSubtargetFeatures(const MachineFunction *MF); + void resetSubtargetFeatures(const MachineFunction *MF) override; + + /// initializeSubtargetDependencies - Initializes using a CPU and feature string + /// so that we can use initializer lists for subtarget initialization. + ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); + + const DataLayout *getDataLayout() const { return &DL; } + const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + ARMJITInfo *getJITInfo() { return &JITInfo; } + const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); } + const ARMTargetLowering *getTargetLowering() const { return &TLInfo; } + const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); } + const ARMBaseRegisterInfo *getRegisterInfo() const { + return &InstrInfo->getRegisterInfo(); + } + private: + const DataLayout DL; + ARMSelectionDAGInfo TSInfo; + ARMJITInfo JITInfo; + // Either Thumb1InstrInfo or Thumb2InstrInfo. + std::unique_ptr<ARMBaseInstrInfo> InstrInfo; + ARMTargetLowering TLInfo; + // Either Thumb1FrameLowering or ARMFrameLowering. + std::unique_ptr<ARMFrameLowering> FrameLowering; + void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); public: @@ -251,13 +292,15 @@ public: bool hasV8Ops() const { return HasV8Ops; } bool isCortexA5() const { return ARMProcFamily == CortexA5; } + bool isCortexA7() const { return ARMProcFamily == CortexA7; } bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } bool isCortexA15() const { return ARMProcFamily == CortexA15; } bool isSwift() const { return ARMProcFamily == Swift; } bool isCortexM3() const { return CPUString == "cortex-m3"; } - bool isLikeA9() const { return isCortexA9() || isCortexA15(); } + bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); } bool isCortexR5() const { return ARMProcFamily == CortexR5; } + bool isKrait() const { return ARMProcFamily == Krait; } bool hasARMOps() const { return !NoARM; } @@ -286,6 +329,7 @@ public: bool isFPOnlySP() const { return FPOnlySP; } bool hasPerfMon() const { return HasPerfMon; } bool hasTrustZone() const { return HasTrustZone; } + bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } @@ -299,22 +343,59 @@ public: const Triple &getTargetTriple() const { return TargetTriple; } - bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } - bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } + bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } - bool isTargetELF() const { return !isTargetDarwin(); } + bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } + bool isTargetNetBSD() const { return TargetTriple.getOS() == Triple::NetBSD; } + bool isTargetWindows() const { return TargetTriple.isOSWindows(); } + + bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } + bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } + // ARM EABI is the bare-metal EABI described in ARM ABI documents and // can be accessed via -target arm-none-eabi. This is NOT GNUEABI. // FIXME: Add a flag for bare-metal for that target and set Triple::EABI // even for GNUEABI, so we can make a distinction here and still conform to // the EABI on GNU (and Android) mode. This requires change in Clang, too. + // FIXME: The Darwin exception is temporary, while we move users to + // "*-*-*-macho" triples as quickly as possible. bool isTargetAEABI() const { - return TargetTriple.getEnvironment() == Triple::EABI; + return (TargetTriple.getEnvironment() == Triple::EABI || + TargetTriple.getEnvironment() == Triple::EABIHF) && + !isTargetDarwin() && !isTargetWindows(); } - bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } - bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } + // ARM Targets that support EHABI exception handling standard + // Darwin uses SjLj. Other targets might need more checks. + bool isTargetEHABICompatible() const { + return (TargetTriple.getEnvironment() == Triple::EABI || + TargetTriple.getEnvironment() == Triple::GNUEABI || + TargetTriple.getEnvironment() == Triple::EABIHF || + TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::Android) && + !isTargetDarwin() && !isTargetWindows(); + } + + bool isTargetHardFloat() const { + // FIXME: this is invalid for WindowsCE + return TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::EABIHF || + isTargetWindows(); + } + bool isTargetAndroid() const { + return TargetTriple.getEnvironment() == Triple::Android; + } + + bool isAPCS_ABI() const { + assert(TargetABI != ARM_ABI_UNKNOWN); + return TargetABI == ARM_ABI_APCS; + } + bool isAAPCS_ABI() const { + assert(TargetABI != ARM_ABI_UNKNOWN); + return TargetABI == ARM_ABI_AAPCS; + } bool isThumb() const { return InThumbMode; } bool isThumb1Only() const { return InThumbMode && !HasThumb2; } @@ -326,7 +407,8 @@ public: bool isR9Reserved() const { return IsR9Reserved; } - bool useMovt() const { return UseMovt && hasV6T2Ops(); } + bool useMovt(const MachineFunction &MF) const; + bool supportsTailCall() const { return SupportsTailCall; } bool allowsUnalignedMem() const { return AllowsUnalignedMem; } @@ -335,16 +417,19 @@ public: const std::string & getCPUString() const { return CPUString; } + bool isLittle() const { return IsLittle; } + unsigned getMispredictionPenalty() const; - + /// This function returns true if the target has sincos() routine in its /// compiler runtime or math libraries. bool hasSinCos() const; - /// enablePostRAScheduler - True at 'More' optimization. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const; + /// True for some subtargets at > -O0. + bool enablePostMachineScheduler() const override; + + // enableAtomicExpandLoadLinked - True if we need to expand our atomics. + bool enableAtomicExpandLoadLinked() const override; /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. @@ -358,6 +443,7 @@ public: /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect /// symbol. bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; + }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index c2bf78877875..d85194b75ecb 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#include "ARMTargetMachine.h" #include "ARM.h" +#include "ARMTargetMachine.h" #include "ARMFrameLowering.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" @@ -24,19 +24,22 @@ using namespace llvm; static cl::opt<bool> -EnableGlobalMerge("global-merge", cl::Hidden, - cl::desc("Enable global merge pass"), - cl::init(true)); - -static cl::opt<bool> DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden, cl::desc("Inhibit optimization of S->D register accesses on A15"), cl::init(false)); +static cl::opt<bool> +EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden, + cl::desc("Run SimplifyCFG after expanding atomic operations" + " to make use of cmpxchg flow-based information"), + cl::init(true)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. - RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); - RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget); + RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget); + RegisterTargetMachine<ARMBETargetMachine> Y(TheARMBETarget); + RegisterTargetMachine<ThumbLETargetMachine> A(TheThumbLETarget); + RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget); } @@ -46,14 +49,14 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, Options), - JITInfo(), - InstrItins(Subtarget.getInstrItineraryData()) { - // Default to soft float ABI + CodeGenOpt::Level OL, bool isLittle) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, *this, isLittle, Options) { + + // Default to triple-appropriate float ABI if (Options.FloatABIType == FloatABI::Default) - this->Options.FloatABIType = FloatABI::Soft; + this->Options.FloatABIType = + Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft; } void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { @@ -67,60 +70,65 @@ void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { void ARMTargetMachine::anchor() { } -ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, +ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - InstrInfo(Subtarget), - DL(Subtarget.isAPCS_ABI() ? - std::string("e-p:32:32-f64:32:64-i64:32:64-" - "v128:32:128-v64:32:64-n32-S32") : - Subtarget.isAAPCS_ABI() ? - std::string("e-p:32:32-f64:64:64-i64:64:64-" - "v128:64:128-v64:64:64-n32-S64") : - std::string("e-p:32:32-f64:64:64-i64:64:64-" - "v128:64:128-v64:64:64-n32-S32")), - TLInfo(*this), - TSInfo(*this), - FrameLowering(Subtarget) { + CodeGenOpt::Level OL, bool isLittle) + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { initAsmInfo(); if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); } +void ARMLETargetMachine::anchor() { } + +ARMLETargetMachine::ARMLETargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} + +void ARMBETargetMachine::anchor() { } + +ARMBETargetMachine::ARMBETargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} + void ThumbTargetMachine::anchor() { } ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - InstrInfo(Subtarget.hasThumb2() - ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) - : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), - DL(Subtarget.isAPCS_ABI() ? - std::string("e-p:32:32-f64:32:64-i64:32:64-" - "i16:16:32-i8:8:32-i1:8:32-" - "v128:32:128-v64:32:64-a:0:32-n32-S32") : - Subtarget.isAAPCS_ABI() ? - std::string("e-p:32:32-f64:64:64-i64:64:64-" - "i16:16:32-i8:8:32-i1:8:32-" - "v128:64:128-v64:64:64-a:0:32-n32-S64") : - std::string("e-p:32:32-f64:64:64-i64:64:64-" - "i16:16:32-i8:8:32-i1:8:32-" - "v128:64:128-v64:64:64-a:0:32-n32-S32")), - TLInfo(*this), - TSInfo(*this), - FrameLowering(Subtarget.hasThumb2() - ? new ARMFrameLowering(Subtarget) - : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { + CodeGenOpt::Level OL, bool isLittle) + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, + isLittle) { initAsmInfo(); } +void ThumbLETargetMachine::anchor() { } + +ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} + +void ThumbBETargetMachine::anchor() { } + +ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} + namespace { /// ARM Code Generator Pass Configuration Options. class ARMPassConfig : public TargetPassConfig { @@ -136,11 +144,12 @@ public: return *getARMTargetMachine().getSubtargetImpl(); } - virtual bool addPreISel(); - virtual bool addInstSelector(); - virtual bool addPreRegAlloc(); - virtual bool addPreSched2(); - virtual bool addPreEmitPass(); + void addIRPasses() override; + bool addPreISel() override; + bool addInstSelector() override; + bool addPreRegAlloc() override; + bool addPreSched2() override; + bool addPreEmitPass() override; }; } // namespace @@ -148,8 +157,22 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { return new ARMPassConfig(this, PM); } +void ARMPassConfig::addIRPasses() { + addPass(createAtomicExpandLoadLinkedPass(TM)); + + // Cmpxchg instructions are often used with a subsequent comparison to + // determine whether it succeeded. We can exploit existing control-flow in + // ldrex/strex loops to simplify this, but it needs tidying up. + const ARMSubtarget *Subtarget = &getARMSubtarget(); + if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) + if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) + addPass(createCFGSimplificationPass()); + + TargetPassConfig::addIRPasses(); +} + bool ARMPassConfig::addPreISel() { - if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) + if (TM->getOptLevel() != CodeGenOpt::None) addPass(createGlobalMergePass(TM)); return false; @@ -166,8 +189,7 @@ bool ARMPassConfig::addInstSelector() { } bool ARMPassConfig::addPreRegAlloc() { - // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) + if (getOptLevel() != CodeGenOpt::None) addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) addPass(createMLxExpansionPass()); @@ -181,12 +203,10 @@ bool ARMPassConfig::addPreRegAlloc() { } bool ARMPassConfig::addPreSched2() { - // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None) { - if (!getARMSubtarget().isThumb1Only()) { - addPass(createARMLoadStoreOptimizationPass()); - printAndVerify("After ARM load / store optimizer"); - } + addPass(createARMLoadStoreOptimizationPass()); + printAndVerify("After ARM load / store optimizer"); + if (getARMSubtarget().hasNEON()) addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } @@ -219,6 +239,7 @@ bool ARMPassConfig::addPreEmitPass() { addPass(&UnpackMachineBundlesID); } + addPass(createARMOptimizeBarriersPass()); addPass(createARMConstantIslandPass()); return true; diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h index d4caf5ca6e19..b72b1df4af83 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -14,18 +14,9 @@ #ifndef ARMTARGETMACHINE_H #define ARMTARGETMACHINE_H -#include "ARMFrameLowering.h" -#include "ARMISelLowering.h" #include "ARMInstrInfo.h" -#include "ARMJITInfo.h" -#include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" -#include "Thumb1FrameLowering.h" -#include "Thumb1InstrInfo.h" -#include "Thumb2InstrInfo.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/IR/DataLayout.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -33,68 +24,76 @@ namespace llvm { class ARMBaseTargetMachine : public LLVMTargetMachine { protected: ARMSubtarget Subtarget; -private: - ARMJITInfo JITInfo; - InstrItineraryData InstrItins; - public: ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); + CodeGenOpt::Level OL, + bool isLittle); - virtual ARMJITInfo *getJITInfo() { return &JITInfo; } - virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const ARMTargetLowering *getTargetLowering() const { - // Implemented by derived classes - llvm_unreachable("getTargetLowering not implemented"); + const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const ARMBaseRegisterInfo *getRegisterInfo() const override { + return getSubtargetImpl()->getRegisterInfo(); + } + const ARMTargetLowering *getTargetLowering() const override { + return getSubtargetImpl()->getTargetLowering(); + } + const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { + return getSubtargetImpl()->getSelectionDAGInfo(); + } + const ARMBaseInstrInfo *getInstrInfo() const override { + return getSubtargetImpl()->getInstrInfo(); } - virtual const InstrItineraryData *getInstrItineraryData() const { - return &InstrItins; + const ARMFrameLowering *getFrameLowering() const override { + return getSubtargetImpl()->getFrameLowering(); } + const InstrItineraryData *getInstrItineraryData() const override { + return &getSubtargetImpl()->getInstrItineraryData(); + } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); + } + ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); } /// \brief Register ARM analysis passes with a pass manager. - virtual void addAnalysisPasses(PassManagerBase &PM); + void addAnalysisPasses(PassManagerBase &PM) override; // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE); + bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE) override; }; /// ARMTargetMachine - ARM target machine. /// class ARMTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); - ARMInstrInfo InstrInfo; - const DataLayout DL; // Calculates type size & alignment - ARMTargetLowering TLInfo; - ARMSelectionDAGInfo TSInfo; - ARMFrameLowering FrameLowering; public: - ARMTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); - - virtual const ARMRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); - } + ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle); +}; - virtual const ARMTargetLowering *getTargetLowering() const { - return &TLInfo; - } +/// ARMLETargetMachine - ARM little endian target machine. +/// +class ARMLETargetMachine : public ARMTargetMachine { + void anchor() override; +public: + ARMLETargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; - virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - virtual const ARMFrameLowering *getFrameLowering() const { - return &FrameLowering; - } - virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const DataLayout *getDataLayout() const { return &DL; } +/// ARMBETargetMachine - ARM big endian target machine. +/// +class ARMBETargetMachine : public ARMTargetMachine { + void anchor() override; +public: + ARMBETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL); }; /// ThumbTargetMachine - Thumb target machine. @@ -103,42 +102,32 @@ class ARMTargetMachine : public ARMBaseTargetMachine { /// class ThumbTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); - // Either Thumb1InstrInfo or Thumb2InstrInfo. - OwningPtr<ARMBaseInstrInfo> InstrInfo; - const DataLayout DL; // Calculates type size & alignment - ARMTargetLowering TLInfo; - ARMSelectionDAGInfo TSInfo; - // Either Thumb1FrameLowering or ARMFrameLowering. - OwningPtr<ARMFrameLowering> FrameLowering; public: - ThumbTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); - - /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo - virtual const ARMBaseRegisterInfo *getRegisterInfo() const { - return &InstrInfo->getRegisterInfo(); - } - - virtual const ARMTargetLowering *getTargetLowering() const { - return &TLInfo; - } + ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle); +}; - virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const { - return &TSInfo; - } +/// ThumbLETargetMachine - Thumb little endian target machine. +/// +class ThumbLETargetMachine : public ThumbTargetMachine { + void anchor() override; +public: + ThumbLETargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; - /// returns either Thumb1InstrInfo or Thumb2InstrInfo - virtual const ARMBaseInstrInfo *getInstrInfo() const { - return InstrInfo.get(); - } - /// returns either Thumb1FrameLowering or ARMFrameLowering - virtual const ARMFrameLowering *getFrameLowering() const { - return FrameLowering.get(); - } - virtual const DataLayout *getDataLayout() const { return &DL; } +/// ThumbBETargetMachine - Thumb big endian target machine. +/// +class ThumbBETargetMachine : public ThumbTargetMachine { + void anchor() override; +public: + ThumbBETargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp index 7ec71b20c64d..48238bfcf696 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -10,13 +10,14 @@ #include "ARMTargetObjectFile.h" #include "ARMSubtarget.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLowering.h" using namespace llvm; using namespace dwarf; @@ -31,7 +32,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, InitializeELF(isAAPCS_ABI); if (isAAPCS_ABI) { - LSDASection = NULL; + LSDASection = nullptr; } AttributesSection = @@ -41,13 +42,22 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, SectionKind::getMetadata()); } -const MCExpr *ARMElfTargetObjectFile:: -getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, - MCStreamer &Streamer) const { +const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference( + const GlobalValue *GV, unsigned Encoding, Mangler &Mang, + const TargetMachine &TM, MachineModuleInfo *MMI, + MCStreamer &Streamer) const { + if (TM.getMCAsmInfo()->getExceptionHandlingType() != ExceptionHandling::ARM) + return TargetLoweringObjectFileELF::getTTypeGlobalReference( + GV, Encoding, Mang, TM, MMI, Streamer); + assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only"); - return MCSymbolRefExpr::Create(getSymbol(*Mang, GV), - MCSymbolRefExpr::VK_ARM_TARGET2, + return MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang), + MCSymbolRefExpr::VK_ARM_TARGET2, getContext()); +} + +const MCExpr *ARMElfTargetObjectFile:: +getDebugThreadLocalSymbol(const MCSymbol *Sym) const { + return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO, getContext()); } diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h index 7f60727e5305..c926421848e5 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h @@ -23,19 +23,19 @@ protected: public: ARMElfTargetObjectFile() : TargetLoweringObjectFileELF(), - AttributesSection(NULL) + AttributesSection(nullptr) {} - virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; const MCExpr * - getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, - MCStreamer &Streamer) const; - - virtual const MCSection *getAttributesSection() const { - return AttributesSection; - } + getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding, + Mangler &Mang, const TargetMachine &TM, + MachineModuleInfo *MMI, + MCStreamer &Streamer) const override; + + /// \brief Describe a TLS variable address within debug info. + const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 6bbb38facc24..a2ace629baa1 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -14,17 +14,18 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "armtti" #include "ARM.h" #include "ARMTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "armtti" + // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializeARMTTIPass(PassRegistry &); @@ -32,7 +33,7 @@ void initializeARMTTIPass(PassRegistry &); namespace { -class ARMTTI : public ImmutablePass, public TargetTransformInfo { +class ARMTTI final : public ImmutablePass, public TargetTransformInfo { const ARMBaseTargetMachine *TM; const ARMSubtarget *ST; const ARMTargetLowering *TLI; @@ -42,7 +43,7 @@ class ARMTTI : public ImmutablePass, public TargetTransformInfo { unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + ARMTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } @@ -52,15 +53,11 @@ public: initializeARMTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() { + void initializePass() override { pushTTIStack(this); } - virtual void finalizePass() { - popTTIStack(); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { TargetTransformInfo::getAnalysisUsage(AU); } @@ -68,7 +65,7 @@ public: static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo*)this; return this; @@ -76,8 +73,8 @@ public: /// \name Scalar TTI Implementations /// @{ - - virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; + using TargetTransformInfo::getIntImmCost; + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; /// @} @@ -85,7 +82,7 @@ public: /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector) const { + unsigned getNumberOfRegisters(bool Vector) const override { if (Vector) { if (ST->hasNEON()) return 16; @@ -94,10 +91,10 @@ public: if (ST->isThumb1Only()) return 8; - return 16; + return 13; } - unsigned getRegisterBitWidth(bool Vector) const { + unsigned getRegisterBitWidth(bool Vector) const override { if (Vector) { if (ST->hasNEON()) return 128; @@ -107,7 +104,7 @@ public: return 32; } - unsigned getMaximumUnrollFactor() const { + unsigned getMaximumUnrollFactor() const override { // These are out of order CPUs: if (ST->isCortexA15() || ST->isSwift()) return 2; @@ -115,23 +112,27 @@ public: } unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, - int Index, Type *SubTp) const; + int Index, Type *SubTp) const override; unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const; + Type *Src) const override; - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const; + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const override; - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const override; - unsigned getAddressComputationCost(Type *Val, bool IsComplex) const; + unsigned getAddressComputationCost(Type *Val, + bool IsComplex) const override; - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Op1Info = OK_AnyValue, - OperandValueKind Op2Info = OK_AnyValue) const; + unsigned + getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Op1Info = OK_AnyValue, + OperandValueKind Op2Info = OK_AnyValue) const override; unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const; + unsigned AddressSpace) const override; /// @} }; @@ -162,25 +163,25 @@ unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { (ARM_AM::getSOImmVal(~ZImmVal) != -1)) return 1; return ST->hasV6T2Ops() ? 2 : 3; - } else if (ST->isThumb2()) { + } + if (ST->isThumb2()) { if ((SImmVal >= 0 && SImmVal < 65536) || (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) return 1; return ST->hasV6T2Ops() ? 2 : 3; - } else /*Thumb1*/ { - if (SImmVal >= 0 && SImmVal < 256) - return 1; - if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) - return 2; - // Load from constantpool. - return 3; } - return 2; + // Thumb1. + if (SImmVal >= 0 && SImmVal < 256) + return 1; + if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) + return 2; + // Load from constantpool. + return 3; } unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { + Type *Src) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -442,34 +443,62 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { - // We only handle costs of reverse shuffles for now. - if (Kind != SK_Reverse) + // We only handle costs of reverse and alternate shuffles for now. + if (Kind != SK_Reverse && Kind != SK_Alternate) return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = { - // Reverse shuffle cost one instruction if we are shuffling within a double - // word (vrev) or two if we shuffle a quad word (vrev, vext). - { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, - - { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 } - }; + if (Kind == SK_Reverse) { + static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = { + // Reverse shuffle cost one instruction if we are shuffling within a + // double word (vrev) or two if we shuffle a quad word (vrev, vext). + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; - int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); - if (Idx == -1) - return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + + int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - return LT.first * NEONShuffleTbl[Idx].Cost; + return LT.first * NEONShuffleTbl[Idx].Cost; + } + if (Kind == SK_Alternate) { + static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = { + // Alt shuffle cost table for ARM. Cost is the number of instructions + // required to create the shuffled vector. + + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2}, + + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16}, + + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + int Idx = + CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + return LT.first * NEONAltShuffleTbl[Idx].Cost; + } + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); } -unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info, +unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Op1Info, OperandValueKind Op2Info) const { int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); @@ -533,7 +562,7 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK // creates a sequence of shift, and, or instructions to construct values. // These sequences are recognized by the ISel and have zero-cost. Not so for // the vectorized code. Because we have support for v2i64 but not i64 those - // sequences look particularily beneficial to vectorize. + // sequences look particularly beneficial to vectorize. // To work around this we increase the cost of v2i64 operations to make them // seem less beneficial. if (LT.second == MVT::v2i64 && diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index e3f9e0dc609a..b62706c45fbf 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -7,33 +7,40 @@ // //===----------------------------------------------------------------------===// -#include "ARMBuildAttrs.h" #include "ARMFPUName.h" #include "ARMFeatures.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMArchName.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/ARMEHABI.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" @@ -48,31 +55,90 @@ class ARMOperand; enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; +class UnwindContext { + MCAsmParser &Parser; + + typedef SmallVector<SMLoc, 4> Locs; + + Locs FnStartLocs; + Locs CantUnwindLocs; + Locs PersonalityLocs; + Locs PersonalityIndexLocs; + Locs HandlerDataLocs; + int FPReg; + +public: + UnwindContext(MCAsmParser &P) : Parser(P), FPReg(ARM::SP) {} + + bool hasFnStart() const { return !FnStartLocs.empty(); } + bool cantUnwind() const { return !CantUnwindLocs.empty(); } + bool hasHandlerData() const { return !HandlerDataLocs.empty(); } + bool hasPersonality() const { + return !(PersonalityLocs.empty() && PersonalityIndexLocs.empty()); + } + + void recordFnStart(SMLoc L) { FnStartLocs.push_back(L); } + void recordCantUnwind(SMLoc L) { CantUnwindLocs.push_back(L); } + void recordPersonality(SMLoc L) { PersonalityLocs.push_back(L); } + void recordHandlerData(SMLoc L) { HandlerDataLocs.push_back(L); } + void recordPersonalityIndex(SMLoc L) { PersonalityIndexLocs.push_back(L); } + + void saveFPReg(int Reg) { FPReg = Reg; } + int getFPReg() const { return FPReg; } + + void emitFnStartLocNotes() const { + for (Locs::const_iterator FI = FnStartLocs.begin(), FE = FnStartLocs.end(); + FI != FE; ++FI) + Parser.Note(*FI, ".fnstart was specified here"); + } + void emitCantUnwindLocNotes() const { + for (Locs::const_iterator UI = CantUnwindLocs.begin(), + UE = CantUnwindLocs.end(); UI != UE; ++UI) + Parser.Note(*UI, ".cantunwind was specified here"); + } + void emitHandlerDataLocNotes() const { + for (Locs::const_iterator HI = HandlerDataLocs.begin(), + HE = HandlerDataLocs.end(); HI != HE; ++HI) + Parser.Note(*HI, ".handlerdata was specified here"); + } + void emitPersonalityLocNotes() const { + for (Locs::const_iterator PI = PersonalityLocs.begin(), + PE = PersonalityLocs.end(), + PII = PersonalityIndexLocs.begin(), + PIE = PersonalityIndexLocs.end(); + PI != PE || PII != PIE;) { + if (PI != PE && (PII == PIE || PI->getPointer() < PII->getPointer())) + Parser.Note(*PI++, ".personality was specified here"); + else if (PII != PIE && (PI == PE || PII->getPointer() < PI->getPointer())) + Parser.Note(*PII++, ".personalityindex was specified here"); + else + llvm_unreachable(".personality and .personalityindex cannot be " + "at the same location"); + } + } + + void reset() { + FnStartLocs = Locs(); + CantUnwindLocs = Locs(); + PersonalityLocs = Locs(); + HandlerDataLocs = Locs(); + PersonalityIndexLocs = Locs(); + FPReg = ARM::SP; + } +}; + class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; const MCInstrInfo &MII; const MCRegisterInfo *MRI; + UnwindContext UC; ARMTargetStreamer &getTargetStreamer() { - MCTargetStreamer &TS = getParser().getStreamer().getTargetStreamer(); + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast<ARMTargetStreamer &>(TS); } - // Unwind directives state - SMLoc FnStartLoc; - SMLoc CantUnwindLoc; - SMLoc PersonalityLoc; - SMLoc HandlerDataLoc; - int FPReg; - void resetUnwindDirectiveParserState() { - FnStartLoc = SMLoc(); - CantUnwindLoc = SMLoc(); - PersonalityLoc = SMLoc(); - HandlerDataLoc = SMLoc(); - FPReg = -1; - } - // Map of register aliases registers via the .req directive. StringMap<unsigned> RegisterReqs; @@ -111,6 +177,9 @@ class ARMAsmParser : public MCTargetAsmParser { MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } + void Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) { + return Parser.Note(L, Msg, Ranges); + } bool Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) { return Parser.Warning(L, Msg, Ranges); @@ -121,15 +190,15 @@ class ARMAsmParser : public MCTargetAsmParser { } int tryParseRegister(); - bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); - int tryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &); - bool parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); - bool parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &); - bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); + bool tryParseRegisterWithWriteBack(OperandVector &); + int tryParseShiftRegister(OperandVector &); + bool parseRegisterList(OperandVector &); + bool parseMemory(OperandVector &); + bool parseOperand(OperandVector &, StringRef Mnemonic); bool parsePrefix(ARMMCExpr::VariantKind &RefKind); bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType, unsigned &ShiftAmount); - bool parseDirectiveWord(unsigned Size, SMLoc L); + bool parseLiteralValues(unsigned Size, SMLoc L); bool parseDirectiveThumb(SMLoc L); bool parseDirectiveARM(SMLoc L); bool parseDirectiveThumbFunc(SMLoc L); @@ -149,6 +218,17 @@ class ARMAsmParser : public MCTargetAsmParser { bool parseDirectiveSetFP(SMLoc L); bool parseDirectivePad(SMLoc L); bool parseDirectiveRegSave(SMLoc L, bool IsVector); + bool parseDirectiveInst(SMLoc L, char Suffix = '\0'); + bool parseDirectiveLtorg(SMLoc L); + bool parseDirectiveEven(SMLoc L); + bool parseDirectivePersonalityIndex(SMLoc L); + bool parseDirectiveUnwindRaw(SMLoc L); + bool parseDirectiveTLSDescSeq(SMLoc L); + bool parseDirectiveMovSP(SMLoc L); + bool parseDirectiveObjectArch(SMLoc L); + bool parseDirectiveArchExtension(SMLoc L); + bool parseDirectiveAlign(SMLoc L); + bool parseDirectiveThumbSet(SMLoc L); StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, @@ -202,54 +282,42 @@ class ARMAsmParser : public MCTargetAsmParser { /// } - OperandMatchResultTy parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseCoprocNumOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseCoprocRegOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseCoprocOptionOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseMemBarrierOptOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseInstSyncBarrierOptOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseProcIFlagsOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseMSRMaskOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &O, - StringRef Op, int Low, int High); - OperandMatchResultTy parsePKHLSLImm(SmallVectorImpl<MCParsedAsmOperand*> &O) { + OperandMatchResultTy parseITCondCode(OperandVector &); + OperandMatchResultTy parseCoprocNumOperand(OperandVector &); + OperandMatchResultTy parseCoprocRegOperand(OperandVector &); + OperandMatchResultTy parseCoprocOptionOperand(OperandVector &); + OperandMatchResultTy parseMemBarrierOptOperand(OperandVector &); + OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &); + OperandMatchResultTy parseProcIFlagsOperand(OperandVector &); + OperandMatchResultTy parseMSRMaskOperand(OperandVector &); + OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low, + int High); + OperandMatchResultTy parsePKHLSLImm(OperandVector &O) { return parsePKHImm(O, "lsl", 0, 31); } - OperandMatchResultTy parsePKHASRImm(SmallVectorImpl<MCParsedAsmOperand*> &O) { + OperandMatchResultTy parsePKHASRImm(OperandVector &O) { return parsePKHImm(O, "asr", 1, 32); } - OperandMatchResultTy parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseRotImm(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseBitfield(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseSetEndImm(OperandVector &); + OperandMatchResultTy parseShifterImm(OperandVector &); + OperandMatchResultTy parseRotImm(OperandVector &); + OperandMatchResultTy parseBitfield(OperandVector &); + OperandMatchResultTy parsePostIdxReg(OperandVector &); + OperandMatchResultTy parseAM3Offset(OperandVector &); + OperandMatchResultTy parseFPImm(OperandVector &); + OperandMatchResultTy parseVectorList(OperandVector &); OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc); // Asm Match Converter Methods - void cvtThumbMultiply(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtThumbBranches(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - - bool validateInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops); - bool processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops); - bool shouldOmitCCOutOperand(StringRef Mnemonic, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); - bool shouldOmitPredicateOperand(StringRef Mnemonic, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); + void cvtThumbMultiply(MCInst &Inst, const OperandVector &); + void cvtThumbBranches(MCInst &Inst, const OperandVector &); + + bool validateInstruction(MCInst &Inst, const OperandVector &Ops); + bool processInstruction(MCInst &Inst, const OperandVector &Ops); + bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands); + bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands); + public: enum ARMMatchResultTy { Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY, @@ -262,8 +330,9 @@ public: }; ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), FPReg(-1) { + const MCInstrInfo &MII, + const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), UC(_Parser) { MCAsmParserExtension::Initialize(_Parser); // Cache the MCRegisterInfo. @@ -279,21 +348,20 @@ public: } // Implementation of the MCTargetAsmParser interface: - bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); - bool ParseDirective(AsmToken DirectiveID); + SMLoc NameLoc, OperandVector &Operands) override; + bool ParseDirective(AsmToken DirectiveID) override; - unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind); - unsigned checkTargetMatchPredicate(MCInst &Inst); + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, + unsigned Kind) override; + unsigned checkTargetMatchPredicate(MCInst &Inst) override; bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm); - void onLabelParsed(MCSymbol *Symbol); - + OperandVector &Operands, MCStreamer &Out, + unsigned &ErrorInfo, + bool MatchingInlineAsm) override; + void onLabelParsed(MCSymbol *Symbol) override; }; } // end anonymous namespace @@ -332,7 +400,7 @@ class ARMOperand : public MCParsedAsmOperand { k_Token } Kind; - SMLoc StartLoc, EndLoc; + SMLoc StartLoc, EndLoc, AlignmentLoc; SmallVector<unsigned, 8> Registers; struct CCOp { @@ -463,8 +531,8 @@ class ARMOperand : public MCParsedAsmOperand { struct BitfieldOp Bitfield; }; - ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} public: + ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { Kind = o.Kind; StartLoc = o.StartLoc; @@ -542,13 +610,19 @@ public: } /// getStartLoc - Get the location of the first token of this operand. - SMLoc getStartLoc() const { return StartLoc; } + SMLoc getStartLoc() const override { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. - SMLoc getEndLoc() const { return EndLoc; } + SMLoc getEndLoc() const override { return EndLoc; } /// getLocRange - Get the range between the first and last token of this /// operand. SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + /// getAlignmentLoc - Get the location of the Alignment token of this operand. + SMLoc getAlignmentLoc() const { + assert(Kind == k_Memory && "Invalid access!"); + return AlignmentLoc; + } + ARMCC::CondCodes getCondCode() const { assert(Kind == k_CondCode && "Invalid access!"); return CC.Val; @@ -564,7 +638,7 @@ public: return StringRef(Tok.Data, Tok.Length); } - unsigned getReg() const { + unsigned getReg() const override { assert((Kind == k_Register || Kind == k_CCOut) && "Invalid access!"); return Reg.RegNum; } @@ -612,7 +686,7 @@ public: bool isCCOut() const { return Kind == k_CCOut; } bool isITMask() const { return Kind == k_ITCondMask; } bool isITCondCode() const { return Kind == k_CondCode; } - bool isImm() const { return Kind == k_Immediate; } + bool isImm() const override { return Kind == k_Immediate; } // checks whether this operand is an unsigned offset which fits is a field // of specified width and scaled by a specific number of bits template<unsigned width, unsigned scale> @@ -988,14 +1062,14 @@ public: int64_t Value = CE->getValue(); return Value == 1 || Value == 0; } - bool isReg() const { return Kind == k_Register; } + bool isReg() const override { return Kind == k_Register; } bool isRegList() const { return Kind == k_RegisterList; } bool isDPRRegList() const { return Kind == k_DPRRegisterList; } bool isSPRRegList() const { return Kind == k_SPRRegisterList; } - bool isToken() const { return Kind == k_Token; } + bool isToken() const override { return Kind == k_Token; } bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; } bool isInstSyncBarrierOpt() const { return Kind == k_InstSyncBarrierOpt; } - bool isMem() const { return Kind == k_Memory; } + bool isMem() const override { return Kind == k_Memory; } bool isShifterImm() const { return Kind == k_ShifterImmediate; } bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; } bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; } @@ -1005,12 +1079,12 @@ public: bool isPostIdxReg() const { return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift; } - bool isMemNoOffset(bool alignOK = false) const { + bool isMemNoOffset(bool alignOK = false, unsigned Alignment = 0) const { if (!isMem()) return false; // No offset of any kind. - return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 && - (alignOK || Memory.Alignment == 0); + return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr && + (alignOK || Memory.Alignment == Alignment); } bool isMemPCRelImm12() const { if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -1026,6 +1100,65 @@ public: bool isAlignedMemory() const { return isMemNoOffset(true); } + bool isAlignedMemoryNone() const { + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemoryNone() const { + return isMemNoOffset(false, 0); + } + bool isAlignedMemory16() const { + if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory16() const { + if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory32() const { + if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory32() const { + if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory64() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory64() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory64or128() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory64or128() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory64or128or256() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16. + return true; + if (isMemNoOffset(false, 32)) // alignment in bytes for 256-bits is 32. + return true; + return isMemNoOffset(false, 0); + } bool isAddrMode2() const { if (!isMem() || Memory.Alignment != 0) return false; // Check for register offset. @@ -1282,6 +1415,7 @@ public: } bool isVecListDPairSpaced() const { + if (Kind != k_VectorList) return false; if (isSingleSpacedVectorList()) return false; return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID] .contains(VectorList.RegNum)); @@ -1460,7 +1594,10 @@ public: } bool isNEONi16splat() const { - if (!isImm()) return false; + if (isNEONByteReplicate(2)) + return false; // Leave that for bytes replication and forbid by default. + if (!isImm()) + return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1470,7 +1607,10 @@ public: } bool isNEONi32splat() const { - if (!isImm()) return false; + if (isNEONByteReplicate(4)) + return false; // Leave that for bytes replication and forbid by default. + if (!isImm()) + return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1482,11 +1622,36 @@ public: (Value >= 0x01000000 && Value <= 0xff000000); } + bool isNEONByteReplicate(unsigned NumBytes) const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) + return false; + int64_t Value = CE->getValue(); + if (!Value) + return false; // Don't bother with zero. + + unsigned char B = Value & 0xff; + for (unsigned i = 1; i < NumBytes; ++i) { + Value >>= 8; + if ((Value & 0xff) != B) + return false; + } + return true; + } + bool isNEONi16ByteReplicate() const { return isNEONByteReplicate(2); } + bool isNEONi32ByteReplicate() const { return isNEONByteReplicate(4); } bool isNEONi32vmov() const { - if (!isImm()) return false; + if (isNEONByteReplicate(4)) + return false; // Let it to be classified as byte-replicate case. + if (!isImm()) + return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. - if (!CE) return false; + if (!CE) + return false; int64_t Value = CE->getValue(); // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X, // for VMOV/VMVN only, 00Xf or 0Xff are also accepted. @@ -1527,7 +1692,7 @@ public: void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. - if (Expr == 0) + if (!Expr) Inst.addOperand(MCOperand::CreateImm(0)); else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) Inst.addOperand(MCOperand::CreateImm(CE->getValue())); @@ -1580,7 +1745,7 @@ public: void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); assert(isRegShiftedReg() && - "addRegShiftedRegOperands() on non RegShiftedReg!"); + "addRegShiftedRegOperands() on non-RegShiftedReg!"); Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg)); Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg)); Inst.addOperand(MCOperand::CreateImm( @@ -1590,7 +1755,7 @@ public: void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); assert(isRegShiftedImm() && - "addRegShiftedImmOperands() on non RegShiftedImm!"); + "addRegShiftedImmOperands() on non-RegShiftedImm!"); Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg)); // Shift of #32 is encoded as 0 where permitted unsigned Imm = (RegShiftedImm.ShiftImm == 32 ? 0 : RegShiftedImm.ShiftImm); @@ -1841,6 +2006,50 @@ public: Inst.addOperand(MCOperand::CreateImm(Memory.Alignment)); } + void addDupAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory16Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory16Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory32Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory32Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory64Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory64Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory64or128or256Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + void addAddrMode2Operands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; @@ -2190,6 +2399,19 @@ public: Inst.addOperand(MCOperand::CreateImm(Value)); } + void addNEONinvByteReplicateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = CE->getValue(); + assert((Inst.getOpcode() == ARM::VMOVv8i8 || + Inst.getOpcode() == ARM::VMOVv16i8) && + "All vmvn instructions that wants to replicate non-zero byte " + "always must be replaced with VMOVv8i8 or VMOVv16i8."); + unsigned B = ((~Value) & 0xff); + B |= 0xe00; // cmode = 0b1110 + Inst.addOperand(MCOperand::CreateImm(B)); + } void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -2204,6 +2426,19 @@ public: Inst.addOperand(MCOperand::CreateImm(Value)); } + void addNEONvmovByteReplicateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = CE->getValue(); + assert((Inst.getOpcode() == ARM::VMOVv8i8 || + Inst.getOpcode() == ARM::VMOVv16i8) && + "All instructions that wants to replicate non-zero byte " + "always must be replaced with VMOVv8i8 or VMOVv16i8."); + unsigned B = Value & 0xff; + B |= 0xe00; // cmode = 0b1110 + Inst.addOperand(MCOperand::CreateImm(B)); + } void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -2230,58 +2465,60 @@ public: Inst.addOperand(MCOperand::CreateImm(Imm | 0x1e00)); } - virtual void print(raw_ostream &OS) const; + void print(raw_ostream &OS) const override; - static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_ITCondMask); + static std::unique_ptr<ARMOperand> CreateITMask(unsigned Mask, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_ITCondMask); Op->ITMask.Mask = Mask; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_CondCode); + static std::unique_ptr<ARMOperand> CreateCondCode(ARMCC::CondCodes CC, + SMLoc S) { + auto Op = make_unique<ARMOperand>(k_CondCode); Op->CC.Val = CC; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_CoprocNum); + static std::unique_ptr<ARMOperand> CreateCoprocNum(unsigned CopVal, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_CoprocNum); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_CoprocReg); + static std::unique_ptr<ARMOperand> CreateCoprocReg(unsigned CopVal, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_CoprocReg); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_CoprocOption); + static std::unique_ptr<ARMOperand> CreateCoprocOption(unsigned Val, SMLoc S, + SMLoc E) { + auto Op = make_unique<ARMOperand>(k_CoprocOption); Op->Cop.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_CCOut); + static std::unique_ptr<ARMOperand> CreateCCOut(unsigned RegNum, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_CCOut); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateToken(StringRef Str, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_Token); + static std::unique_ptr<ARMOperand> CreateToken(StringRef Str, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -2289,20 +2526,20 @@ public: return Op; } - static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_Register); + static std::unique_ptr<ARMOperand> CreateReg(unsigned RegNum, SMLoc S, + SMLoc E) { + auto Op = make_unique<ARMOperand>(k_Register); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, - unsigned SrcReg, - unsigned ShiftReg, - unsigned ShiftImm, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_ShiftedRegister); + static std::unique_ptr<ARMOperand> + CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, unsigned SrcReg, + unsigned ShiftReg, unsigned ShiftImm, SMLoc S, + SMLoc E) { + auto Op = make_unique<ARMOperand>(k_ShiftedRegister); Op->RegShiftedReg.ShiftTy = ShTy; Op->RegShiftedReg.SrcReg = SrcReg; Op->RegShiftedReg.ShiftReg = ShiftReg; @@ -2312,11 +2549,10 @@ public: return Op; } - static ARMOperand *CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, - unsigned SrcReg, - unsigned ShiftImm, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_ShiftedImmediate); + static std::unique_ptr<ARMOperand> + CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, unsigned SrcReg, + unsigned ShiftImm, SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_ShiftedImmediate); Op->RegShiftedImm.ShiftTy = ShTy; Op->RegShiftedImm.SrcReg = SrcReg; Op->RegShiftedImm.ShiftImm = ShiftImm; @@ -2325,9 +2561,9 @@ public: return Op; } - static ARMOperand *CreateShifterImm(bool isASR, unsigned Imm, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_ShifterImmediate); + static std::unique_ptr<ARMOperand> CreateShifterImm(bool isASR, unsigned Imm, + SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_ShifterImmediate); Op->ShifterImm.isASR = isASR; Op->ShifterImm.Imm = Imm; Op->StartLoc = S; @@ -2335,17 +2571,18 @@ public: return Op; } - static ARMOperand *CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_RotateImmediate); + static std::unique_ptr<ARMOperand> CreateRotImm(unsigned Imm, SMLoc S, + SMLoc E) { + auto Op = make_unique<ARMOperand>(k_RotateImmediate); Op->RotImm.Imm = Imm; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateBitfield(unsigned LSB, unsigned Width, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_BitfieldDescriptor); + static std::unique_ptr<ARMOperand> + CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor); Op->Bitfield.LSB = LSB; Op->Bitfield.Width = Width; Op->StartLoc = S; @@ -2353,8 +2590,8 @@ public: return Op; } - static ARMOperand * - CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned> > &Regs, + static std::unique_ptr<ARMOperand> + CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs, SMLoc StartLoc, SMLoc EndLoc) { assert (Regs.size() > 0 && "RegList contains no registers?"); KindTy Kind = k_RegisterList; @@ -2368,7 +2605,7 @@ public: // Sort based on the register encoding values. array_pod_sort(Regs.begin(), Regs.end()); - ARMOperand *Op = new ARMOperand(Kind); + auto Op = make_unique<ARMOperand>(Kind); for (SmallVectorImpl<std::pair<unsigned, unsigned> >::const_iterator I = Regs.begin(), E = Regs.end(); I != E; ++I) Op->Registers.push_back(I->second); @@ -2377,9 +2614,11 @@ public: return Op; } - static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count, - bool isDoubleSpaced, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_VectorList); + static std::unique_ptr<ARMOperand> CreateVectorList(unsigned RegNum, + unsigned Count, + bool isDoubleSpaced, + SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_VectorList); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.isDoubleSpaced = isDoubleSpaced; @@ -2388,10 +2627,10 @@ public: return Op; } - static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count, - bool isDoubleSpaced, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_VectorListAllLanes); + static std::unique_ptr<ARMOperand> + CreateVectorListAllLanes(unsigned RegNum, unsigned Count, bool isDoubleSpaced, + SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_VectorListAllLanes); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.isDoubleSpaced = isDoubleSpaced; @@ -2400,11 +2639,10 @@ public: return Op; } - static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count, - unsigned Index, - bool isDoubleSpaced, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_VectorListIndexed); + static std::unique_ptr<ARMOperand> + CreateVectorListIndexed(unsigned RegNum, unsigned Count, unsigned Index, + bool isDoubleSpaced, SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_VectorListIndexed); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.LaneIndex = Index; @@ -2414,32 +2652,30 @@ public: return Op; } - static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, - MCContext &Ctx) { - ARMOperand *Op = new ARMOperand(k_VectorIndex); + static std::unique_ptr<ARMOperand> + CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<ARMOperand>(k_VectorIndex); Op->VectorIndex.Val = Idx; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_Immediate); + static std::unique_ptr<ARMOperand> CreateImm(const MCExpr *Val, SMLoc S, + SMLoc E) { + auto Op = make_unique<ARMOperand>(k_Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateMem(unsigned BaseRegNum, - const MCConstantExpr *OffsetImm, - unsigned OffsetRegNum, - ARM_AM::ShiftOpc ShiftType, - unsigned ShiftImm, - unsigned Alignment, - bool isNegative, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_Memory); + static std::unique_ptr<ARMOperand> + CreateMem(unsigned BaseRegNum, const MCConstantExpr *OffsetImm, + unsigned OffsetRegNum, ARM_AM::ShiftOpc ShiftType, + unsigned ShiftImm, unsigned Alignment, bool isNegative, SMLoc S, + SMLoc E, SMLoc AlignmentLoc = SMLoc()) { + auto Op = make_unique<ARMOperand>(k_Memory); Op->Memory.BaseRegNum = BaseRegNum; Op->Memory.OffsetImm = OffsetImm; Op->Memory.OffsetRegNum = OffsetRegNum; @@ -2449,14 +2685,14 @@ public: Op->Memory.isNegative = isNegative; Op->StartLoc = S; Op->EndLoc = E; + Op->AlignmentLoc = AlignmentLoc; return Op; } - static ARMOperand *CreatePostIdxReg(unsigned RegNum, bool isAdd, - ARM_AM::ShiftOpc ShiftTy, - unsigned ShiftImm, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_PostIndexRegister); + static std::unique_ptr<ARMOperand> + CreatePostIdxReg(unsigned RegNum, bool isAdd, ARM_AM::ShiftOpc ShiftTy, + unsigned ShiftImm, SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_PostIndexRegister); Op->PostIdxReg.RegNum = RegNum; Op->PostIdxReg.isAdd = isAdd; Op->PostIdxReg.ShiftTy = ShiftTy; @@ -2466,33 +2702,35 @@ public: return Op; } - static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_MemBarrierOpt); + static std::unique_ptr<ARMOperand> CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, + SMLoc S) { + auto Op = make_unique<ARMOperand>(k_MemBarrierOpt); Op->MBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, - SMLoc S) { - ARMOperand *Op = new ARMOperand(k_InstSyncBarrierOpt); + static std::unique_ptr<ARMOperand> + CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_InstSyncBarrierOpt); Op->ISBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_ProcIFlags); + static std::unique_ptr<ARMOperand> CreateProcIFlags(ARM_PROC::IFlags IFlags, + SMLoc S) { + auto Op = make_unique<ARMOperand>(k_ProcIFlags); Op->IFlags.Val = IFlags; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_MSRMask); + static std::unique_ptr<ARMOperand> CreateMSRMask(unsigned MMask, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_MSRMask); Op->MMask.Val = MMask; Op->StartLoc = S; Op->EndLoc = S; @@ -2696,11 +2934,11 @@ int ARMAsmParser::tryParseRegister() { // occurs, return -1. An irrecoverable error is one where tokens have been // consumed in the process of trying to parse the shifter (i.e., when it is // indeed a shifter operand, but malformed). -int ARMAsmParser::tryParseShiftRegister( - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (Tok.isNot(AsmToken::Identifier)) + return -1; std::string lowerCase = Tok.getString().lower(); ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase) @@ -2720,7 +2958,8 @@ int ARMAsmParser::tryParseShiftRegister( // The source register for the shift has already been added to the // operand list, so we need to pop it off and combine it into the shifted // register operand instead. - OwningPtr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val()); + std::unique_ptr<ARMOperand> PrevOp( + (ARMOperand *)Operands.pop_back_val().release()); if (!PrevOp->isReg()) return Error(PrevOp->getStartLoc(), "shift must be of a register"); int SrcReg = PrevOp->getReg(); @@ -2739,7 +2978,7 @@ int ARMAsmParser::tryParseShiftRegister( Parser.getTok().is(AsmToken::Dollar)) { Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); - const MCExpr *ShiftExpr = 0; + const MCExpr *ShiftExpr = nullptr; if (getParser().parseExpression(ShiftExpr, EndLoc)) { Error(ImmLoc, "invalid immediate shift value"); return -1; @@ -2769,12 +3008,12 @@ int ARMAsmParser::tryParseShiftRegister( EndLoc = Parser.getTok().getEndLoc(); ShiftReg = tryParseRegister(); if (ShiftReg == -1) { - Error (L, "expected immediate or register in shift operand"); + Error(L, "expected immediate or register in shift operand"); return -1; } } else { - Error (Parser.getTok().getLoc(), - "expected immediate or register in shift operand"); + Error(Parser.getTok().getLoc(), + "expected immediate or register in shift operand"); return -1; } } @@ -2797,8 +3036,7 @@ int ARMAsmParser::tryParseShiftRegister( /// /// TODO this is likely to change to allow different register types and or to /// parse for a specific register type. -bool ARMAsmParser:: -tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) { const AsmToken &RegTok = Parser.getTok(); int RegNo = tryParseRegister(); if (RegNo == -1) @@ -2844,17 +3082,25 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } /// MatchCoprocessorOperandName - Try to parse an coprocessor related -/// instruction with a symbolic operand name. Example: "p1", "p7", "c3", -/// "c5", ... +/// instruction with a symbolic operand name. +/// We accept "crN" syntax for GAS compatibility. +/// <operand-name> ::= <prefix><number> +/// If CoprocOp is 'c', then: +/// <prefix> ::= c | cr +/// If CoprocOp is 'p', then : +/// <prefix> ::= p +/// <number> ::= integer in range [0, 15] static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { // Use the same layout as the tablegen'erated register name matcher. Ugly, // but efficient. + if (Name.size() < 2 || Name[0] != CoprocOp) + return -1; + Name = (Name[1] == 'r') ? Name.drop_front(2) : Name.drop_front(); + switch (Name.size()) { default: return -1; - case 2: - if (Name[0] != CoprocOp) - return -1; - switch (Name[1]) { + case 1: + switch (Name[0]) { default: return -1; case '0': return 0; case '1': return 1; @@ -2867,10 +3113,10 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { case '8': return 8; case '9': return 9; } - case 3: - if (Name[0] != CoprocOp || Name[1] != '1') + case 2: + if (Name[0] != '1') return -1; - switch (Name[2]) { + switch (Name[1]) { default: return -1; // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON) case '0': return CoprocOp == 'p'? -1: 10; @@ -2884,8 +3130,8 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { } /// parseITCondCode - Try to parse a condition code for an IT instruction. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseITCondCode(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -2921,8 +3167,8 @@ parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// parseCoprocNumOperand - Try to parse an coprocessor number operand. The /// token must be an Identifier when called, and if it is a coprocessor /// number, the token is eaten and the operand is added to the operand list. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -2940,8 +3186,8 @@ parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// parseCoprocRegOperand - Try to parse an coprocessor register operand. The /// token must be an Identifier when called, and if it is a coprocessor /// number, the token is eaten and the operand is added to the operand list. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -2958,8 +3204,8 @@ parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// parseCoprocOptionOperand - Try to parse an coprocessor option operand. /// coproc_option : '{' imm0_255 '}' -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); // If this isn't a '{', this isn't a coprocessor immediate operand. @@ -3036,8 +3282,7 @@ static unsigned getDRegFromQReg(unsigned QReg) { } /// Parse a register list. -bool ARMAsmParser:: -parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool ARMAsmParser::parseRegisterList(OperandVector &Operands) { assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Curly Brace"); SMLoc S = Parser.getTok().getLoc(); @@ -3218,8 +3463,8 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { } // parse a vector register list -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseVectorList(OperandVector &Operands) { VectorLaneTy LaneKind; unsigned LaneIndex; SMLoc S = Parser.getTok().getLoc(); @@ -3469,8 +3714,8 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } /// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); unsigned Opt; @@ -3518,7 +3763,7 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } - + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(MemBarrierID); if (!CE) { Error(Loc, "constant expression expected"); @@ -3540,8 +3785,8 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } /// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); unsigned Opt; @@ -3591,8 +3836,8 @@ parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// parseProcIFlagsOperand - Try to parse iflags from CPS instruction. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -3625,8 +3870,8 @@ parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } /// parseMSRMaskOperand - Try to parse mask flags from MSR instruction. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -3753,9 +3998,9 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, - int Low, int High) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low, + int High) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) { Error(Parser.getTok().getLoc(), Op + " operand expected."); @@ -3801,8 +4046,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseSetEndImm(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) { @@ -3830,8 +4075,8 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// lsl #n 'n' in [0,31] /// asr #n 'n' in [1,32] /// n == 32 encoded as n == 0. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseShifterImm(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) { @@ -3900,8 +4145,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family /// of instructions. Legal values are: /// ror #n 'n' in {0, 8, 16, 24} -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseRotImm(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) @@ -3946,8 +4191,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseBitfield(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); // The bitfield descriptor is really two operands, the LSB and the width. if (Parser.getTok().isNot(AsmToken::Hash) && @@ -4014,8 +4259,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parsePostIdxReg(OperandVector &Operands) { // Check for a post-index addressing register operand. Specifically: // postidx_reg := '+' register {, shift} // | '-' register {, shift} @@ -4063,8 +4308,8 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseAM3Offset(OperandVector &Operands) { // Check for a post-index addressing register operand. Specifically: // am3offset := '+' register // | '-' register @@ -4117,7 +4362,7 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { isAdd = false; haveEaten = true; } - + Tok = Parser.getTok(); int Reg = tryParseRegister(); if (Reg == -1) { @@ -4136,26 +4381,24 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// Convert parsed operands to MCInst. Needed here because this instruction /// only has two register operands, but multiplication is commutative so /// assemblers should accept both "mul rD, rN, rD" and "mul rD, rD, rN". -void ARMAsmParser:: -cvtThumbMultiply(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1); +void ARMAsmParser::cvtThumbMultiply(MCInst &Inst, + const OperandVector &Operands) { + ((ARMOperand &)*Operands[3]).addRegOperands(Inst, 1); + ((ARMOperand &)*Operands[1]).addCCOutOperands(Inst, 1); // If we have a three-operand form, make sure to set Rn to be the operand // that isn't the same as Rd. unsigned RegOp = 4; if (Operands.size() == 6 && - ((ARMOperand*)Operands[4])->getReg() == - ((ARMOperand*)Operands[3])->getReg()) + ((ARMOperand &)*Operands[4]).getReg() == + ((ARMOperand &)*Operands[3]).getReg()) RegOp = 5; - ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1); + ((ARMOperand &)*Operands[RegOp]).addRegOperands(Inst, 1); Inst.addOperand(Inst.getOperand(0)); - ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); + ((ARMOperand &)*Operands[2]).addCondCodeOperands(Inst, 2); } -void ARMAsmParser:: -cvtThumbBranches(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +void ARMAsmParser::cvtThumbBranches(MCInst &Inst, + const OperandVector &Operands) { int CondOp = -1, ImmOp = -1; switch(Inst.getOpcode()) { case ARM::tB: @@ -4178,7 +4421,7 @@ cvtThumbBranches(MCInst &Inst, } else { // outside IT blocks we can only have unconditional branches with AL // condition code or conditional branches with non-AL condition code - unsigned Cond = static_cast<ARMOperand*>(Operands[CondOp])->getCondCode(); + unsigned Cond = static_cast<ARMOperand &>(*Operands[CondOp]).getCondCode(); switch(Inst.getOpcode()) { case ARM::tB: case ARM::tBcc: @@ -4190,32 +4433,31 @@ cvtThumbBranches(MCInst &Inst, break; } } - + // now decide on encoding size based on branch target range switch(Inst.getOpcode()) { // classify tB as either t2B or t1B based on range of immediate operand case ARM::tB: { - ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]); - if(!op->isSignedOffset<11, 1>() && isThumbTwo()) + ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]); + if (!op.isSignedOffset<11, 1>() && isThumbTwo()) Inst.setOpcode(ARM::t2B); break; } // classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand case ARM::tBcc: { - ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]); - if(!op->isSignedOffset<8, 1>() && isThumbTwo()) + ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]); + if (!op.isSignedOffset<8, 1>() && isThumbTwo()) Inst.setOpcode(ARM::t2Bcc); break; } } - ((ARMOperand*)Operands[ImmOp])->addImmOperands(Inst, 1); - ((ARMOperand*)Operands[CondOp])->addCondCodeOperands(Inst, 2); + ((ARMOperand &)*Operands[ImmOp]).addImmOperands(Inst, 1); + ((ARMOperand &)*Operands[CondOp]).addCondCodeOperands(Inst, 2); } /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. -bool ARMAsmParser:: -parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool ARMAsmParser::parseMemory(OperandVector &Operands) { SMLoc S, E; assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); @@ -4237,8 +4479,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { E = Tok.getEndLoc(); Parser.Lex(); // Eat right bracket token. - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift, - 0, 0, false, S, E)); + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0, + ARM_AM::no_shift, 0, 0, false, + S, E)); // If there's a pre-indexing writeback marker, '!', just add it as a token // operand. It's rather odd, but syntactically valid. @@ -4260,6 +4503,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (Parser.getTok().is(AsmToken::Colon)) { Parser.Lex(); // Eat the ':'. E = Parser.getTok().getLoc(); + SMLoc AlignmentLoc = Tok.getLoc(); const MCExpr *Expr; if (getParser().parseExpression(Expr)) @@ -4292,9 +4536,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Don't worry about range checking the value here. That's handled by // the is*() predicates. - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0, ARM_AM::no_shift, 0, Align, - false, S, E)); + false, S, E, AlignmentLoc)); // If there's a pre-indexing writeback marker, '!', just add it as a token // operand. @@ -4385,7 +4629,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum, + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, OffsetRegNum, ShiftType, ShiftImm, 0, isNegative, S, E)); @@ -4463,8 +4707,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, } /// parseFPImm - A floating point immediate expression operand. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseFPImm(OperandVector &Operands) { // Anything that can accept a floating point constant as an operand // needs to go through here, as the regular parseExpression is // integer only. @@ -4490,9 +4734,13 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // integer constant. Make sure we don't try to parse an FPImm // for these: // vmov.i{8|16|32|64} <dreg|qreg>, #imm - ARMOperand *TyOp = static_cast<ARMOperand*>(Operands[2]); - if (!TyOp->isToken() || (TyOp->getToken() != ".f32" && - TyOp->getToken() != ".f64")) + ARMOperand &TyOp = static_cast<ARMOperand &>(*Operands[2]); + bool isVmovf = TyOp.isToken() && + (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64"); + ARMOperand &Mnemonic = static_cast<ARMOperand &>(*Operands[0]); + bool isFconst = Mnemonic.isToken() && (Mnemonic.getToken() == "fconstd" || + Mnemonic.getToken() == "fconsts"); + if (!(isVmovf || isFconst)) return MatchOperand_NoMatch; Parser.Lex(); // Eat '#' or '$'. @@ -4505,7 +4753,7 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } const AsmToken &Tok = Parser.getTok(); SMLoc Loc = Tok.getLoc(); - if (Tok.is(AsmToken::Real)) { + if (Tok.is(AsmToken::Real) && isVmovf) { APFloat RealVal(APFloat::IEEEsingle, Tok.getString()); uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); // If we had a '-' in front, toggle the sign bit. @@ -4518,15 +4766,16 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // Also handle plain integers. Instructions which allow floating point // immediates also allow a raw encoded 8-bit value. - if (Tok.is(AsmToken::Integer)) { + if (Tok.is(AsmToken::Integer) && isFconst) { int64_t Val = Tok.getIntVal(); Parser.Lex(); // Eat the token. if (Val > 255 || Val < 0) { Error(Loc, "encoded floating point value out of range"); return MatchOperand_ParseFail; } - double RealVal = ARM_AM::getFPImmFloat(Val); - Val = APFloat(APFloat::IEEEdouble, RealVal).bitcastToAPInt().getZExtValue(); + float RealVal = ARM_AM::getFPImmFloat(Val); + Val = APFloat(RealVal).bitcastToAPInt().getZExtValue(); + Operands.push_back(ARMOperand::CreateImm( MCConstantExpr::Create(Val, getContext()), S, Parser.getTok().getLoc())); @@ -4539,8 +4788,7 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. -bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Mnemonic) { +bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { SMLoc S, E; // Check if the current operand has a custom associated parser, if so, try to @@ -4623,7 +4871,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); // There can be a trailing '!' on operands that we want as a separate - // '!' Token operand. Handle that here. For example, the compatibilty + // '!' Token operand. Handle that here. For example, the compatibility // alias for 'srsdb sp!, #imm' is 'srsdb #imm!'. if (Parser.getTok().is(AsmToken::Exclaim)) { Operands.push_back(ARMOperand::CreateToken(Parser.getTok().getString(), @@ -4653,6 +4901,20 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E)); return false; } + case AsmToken::Equal: { + if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val) + return Error(Parser.getTok().getLoc(), "unexpected token in operand"); + + Parser.Lex(); // Eat '=' + const MCExpr *SubExprVal; + if (getParser().parseExpression(SubExprVal)) + return true; + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal); + Operands.push_back(ARMOperand::CreateImm(CPLoc, S, E)); + return false; + } } } @@ -4661,6 +4923,10 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { RefKind = ARMMCExpr::VK_ARM_None; + // consume an optional '#' (GNU compatibility) + if (getLexer().is(AsmToken::Hash)) + Parser.Lex(); + // :lower16: and :upper16: modifiers assert(getLexer().is(AsmToken::Colon) && "expected a :"); Parser.Lex(); // Eat ':' @@ -4763,7 +5029,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" || Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" || - Mnemonic == "vfms" || Mnemonic == "vfnms" || + Mnemonic == "vfms" || Mnemonic == "vfnms" || Mnemonic == "fconsts" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -4817,8 +5083,9 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || - Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic.startswith("crc32") || - Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") || + Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" || + Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") || + Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || @@ -4847,7 +5114,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, } bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandVector &Operands) { // FIXME: This is all horribly hacky. We really need a better way to deal // with optional operands like this in the matcher table. @@ -4860,17 +5127,17 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // conditionally adding the cc_out in the first place because we need // to check the type of the parsed immediate operand. if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() && - !static_cast<ARMOperand*>(Operands[4])->isARMSOImm() && - static_cast<ARMOperand*>(Operands[4])->isImm0_65535Expr() && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + !static_cast<ARMOperand &>(*Operands[4]).isARMSOImm() && + static_cast<ARMOperand &>(*Operands[4]).isImm0_65535Expr() && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0) return true; // Register-register 'add' for thumb does not have a cc_out operand // when there are only two register operands. if (isThumb() && Mnemonic == "add" && Operands.size() == 5 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).isReg() && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0) return true; // Register-register 'add' for thumb does not have a cc_out operand // when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do @@ -4878,13 +5145,12 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // that can handle a different range and has a cc_out operand. if (((isThumb() && Mnemonic == "add") || (isThumbTwo() && Mnemonic == "sub")) && - Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) || - static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4())) + Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::SP && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 && + ((Mnemonic == "add" && static_cast<ARMOperand &>(*Operands[5]).isReg()) || + static_cast<ARMOperand &>(*Operands[5]).isImm0_1020s4())) return true; // For Thumb2, add/sub immediate does not have a cc_out operand for the // imm0_4095 variant. That's the least-preferred variant when @@ -4892,23 +5158,22 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // should remove the cc_out operand, we have to explicitly check that // it's not one of the other variants. Ugh. if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") && - Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[5])->isImm()) { + Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).isReg() && + static_cast<ARMOperand &>(*Operands[5]).isImm()) { // Nest conditions rather than one big 'if' statement for readability. // // If both registers are low, we're in an IT block, and the immediate is // in range, we should use encoding T1 instead, which has a cc_out. if (inITBlock() && - isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) && - isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) && - static_cast<ARMOperand*>(Operands[5])->isImm0_7()) + isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) && + isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) && + static_cast<ARMOperand &>(*Operands[5]).isImm0_7()) return false; // Check against T3. If the second register is the PC, this is an // alternate form of ADR, which uses encoding T4, so check for that too. - if (static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC && - static_cast<ARMOperand*>(Operands[5])->isT2SOImm()) + if (static_cast<ARMOperand &>(*Operands[4]).getReg() != ARM::PC && + static_cast<ARMOperand &>(*Operands[5]).isT2SOImm()) return false; // Otherwise, we use encoding T4, which does not have a cc_out @@ -4920,35 +5185,34 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // if we have a "mul" mnemonic in Thumb mode, check if we'll be able to // use the 16-bit encoding or not. if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[5])->isReg() && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 && + static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).isReg() && + static_cast<ARMOperand &>(*Operands[5]).isReg() && // If the registers aren't low regs, the destination reg isn't the // same as one of the source regs, or the cc_out operand is zero // outside of an IT block, we have to use the 32-bit encoding, so // remove the cc_out operand. - (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || - !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || - !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) || - !inITBlock() || - (static_cast<ARMOperand*>(Operands[3])->getReg() != - static_cast<ARMOperand*>(Operands[5])->getReg() && - static_cast<ARMOperand*>(Operands[3])->getReg() != - static_cast<ARMOperand*>(Operands[4])->getReg()))) + (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) || + !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) || + !isARMLowRegister(static_cast<ARMOperand &>(*Operands[5]).getReg()) || + !inITBlock() || (static_cast<ARMOperand &>(*Operands[3]).getReg() != + static_cast<ARMOperand &>(*Operands[5]).getReg() && + static_cast<ARMOperand &>(*Operands[3]).getReg() != + static_cast<ARMOperand &>(*Operands[4]).getReg()))) return true; // Also check the 'mul' syntax variant that doesn't specify an explicit // destination register. if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 && + static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).isReg() && // If the registers aren't low regs or the cc_out operand is zero // outside of an IT block, we have to use the 32-bit encoding, so // remove the cc_out operand. - (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || - !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || + (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) || + !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) || !inITBlock())) return true; @@ -4961,32 +5225,32 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // anyway. if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") && (Operands.size() == 5 || Operands.size() == 6) && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - (static_cast<ARMOperand*>(Operands[4])->isImm() || + static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::SP && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 && + (static_cast<ARMOperand &>(*Operands[4]).isImm() || (Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[5])->isImm()))) + static_cast<ARMOperand &>(*Operands[5]).isImm()))) return true; return false; } -bool ARMAsmParser::shouldOmitPredicateOperand( - StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic, + OperandVector &Operands) { // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON unsigned RegIdx = 3; if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") && - static_cast<ARMOperand *>(Operands[2])->getToken() == ".f32") { - if (static_cast<ARMOperand *>(Operands[3])->isToken() && - static_cast<ARMOperand *>(Operands[3])->getToken() == ".f32") + static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32") { + if (static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32") RegIdx = 4; - if (static_cast<ARMOperand *>(Operands[RegIdx])->isReg() && - (ARMMCRegisterClasses[ARM::DPRRegClassID] - .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()) || - ARMMCRegisterClasses[ARM::QPRRegClassID] - .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()))) + if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() && + (ARMMCRegisterClasses[ARM::DPRRegClassID].contains( + static_cast<ARMOperand &>(*Operands[RegIdx]).getReg()) || + ARMMCRegisterClasses[ARM::QPRRegClassID].contains( + static_cast<ARMOperand &>(*Operands[RegIdx]).getReg()))) return true; } return false; @@ -5009,12 +5273,39 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { } static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features, unsigned VariantID); + +static bool RequiresVFPRegListValidation(StringRef Inst, + bool &AcceptSinglePrecisionOnly, + bool &AcceptDoublePrecisionOnly) { + if (Inst.size() < 7) + return false; + + if (Inst.startswith("fldm") || Inst.startswith("fstm")) { + StringRef AddressingMode = Inst.substr(4, 2); + if (AddressingMode == "ia" || AddressingMode == "db" || + AddressingMode == "ea" || AddressingMode == "fd") { + AcceptSinglePrecisionOnly = Inst[6] == 's'; + AcceptDoublePrecisionOnly = Inst[6] == 'd' || Inst[6] == 'x'; + return true; + } + } + + return false; +} + /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc NameLoc, OperandVector &Operands) { + // FIXME: Can this be done via tablegen in some fashion? + bool RequireVFPRegisterListCheck; + bool AcceptSinglePrecisionOnly; + bool AcceptDoublePrecisionOnly; + RequireVFPRegisterListCheck = + RequiresVFPRegListValidation(Name, AcceptSinglePrecisionOnly, + AcceptDoublePrecisionOnly); + // Apply mnemonic aliases before doing anything else, as the destination - // mnemnonic may include suffices and we want to handle them normally. + // mnemonic may include suffices and we want to handle them normally. // The generic tblgen'erated code does this later, at the start of // MatchInstructionImpl(), but that's too late for aliases that include // any sort of suffix. @@ -5141,6 +5432,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // For for ARM mode generate an error if the .n qualifier is used. if (ExtraToken == ".n" && !isThumb()) { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); + Parser.eatToEndOfStatement(); return Error(Loc, "instruction with .n (narrow) qualifier not allowed in " "arm mode"); } @@ -5181,6 +5473,16 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Parser.Lex(); // Consume the EndOfStatement + if (RequireVFPRegisterListCheck) { + ARMOperand &Op = static_cast<ARMOperand &>(*Operands.back()); + if (AcceptSinglePrecisionOnly && !Op.isSPRRegList()) + return Error(Op.getStartLoc(), + "VFP/Neon single precision register expected"); + if (AcceptDoublePrecisionOnly && !Op.isDPRRegList()) + return Error(Op.getStartLoc(), + "VFP/Neon double precision register expected"); + } + // Some instructions, mostly Thumb, have forms for the same mnemonic that // do and don't have a cc_out optional-def operand. With some spot-checks // of the operand list, we can figure out which variant we're trying to @@ -5188,20 +5490,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // try to remove a cc_out operand that was explicitly set on the the // mnemonic, of course (CarrySetting == true). Reason number #317 the // table driven matcher doesn't fit well with the ARM instruction set. - if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) Operands.erase(Operands.begin() + 1); - delete Op; - } // Some instructions have the same mnemonic, but don't always // have a predicate. Distinguish them here and delete the // predicate if needed. - if (shouldOmitPredicateOperand(Mnemonic, Operands)) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + if (shouldOmitPredicateOperand(Mnemonic, Operands)) Operands.erase(Operands.begin() + 1); - delete Op; - } // ARM mode 'blx' need special handling, as the register operand version // is predicable, but the label operand version is not. So, we can't rely @@ -5209,11 +5505,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // a k_CondCode operand in the list. If we're trying to match the label // version, remove the k_CondCode operand here. if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 && - static_cast<ARMOperand*>(Operands[2])->isImm()) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + static_cast<ARMOperand &>(*Operands[2]).isImm()) Operands.erase(Operands.begin() + 1); - delete Op; - } // Adjust operands of ldrexd/strexd to MCK_GPRPair. // ldrexd/strexd require even/odd GPR pair. To enforce this constraint, @@ -5226,32 +5519,50 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Mnemonic == "stlexd")) { bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd"); unsigned Idx = isLoad ? 2 : 3; - ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]); - ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]); + ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]); + ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]); const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID); // Adjust only if Op1 and Op2 are GPRs. - if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) && - MRC.contains(Op2->getReg())) { - unsigned Reg1 = Op1->getReg(); - unsigned Reg2 = Op2->getReg(); + if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) && + MRC.contains(Op2.getReg())) { + unsigned Reg1 = Op1.getReg(); + unsigned Reg2 = Op2.getReg(); unsigned Rt = MRI->getEncodingValue(Reg1); unsigned Rt2 = MRI->getEncodingValue(Reg2); // Rt2 must be Rt + 1 and Rt must be even. if (Rt + 1 != Rt2 || (Rt & 1)) { - Error(Op2->getStartLoc(), isLoad ? - "destination operands must be sequential" : - "source operands must be sequential"); + Error(Op2.getStartLoc(), isLoad + ? "destination operands must be sequential" + : "source operands must be sequential"); return true; } unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0, &(MRI->getRegClass(ARM::GPRPairRegClassID))); - Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2); - Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg( - NewReg, Op1->getStartLoc(), Op2->getEndLoc())); - delete Op1; - delete Op2; + Operands[Idx] = + ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc()); + Operands.erase(Operands.begin() + Idx + 1); + } + } + + // GNU Assembler extension (compatibility) + if ((Mnemonic == "ldrd" || Mnemonic == "strd")) { + ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); + ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); + if (Op3.isMem()) { + assert(Op2.isReg() && "expected register argument"); + + unsigned SuperReg = MRI->getMatchingSuperReg( + Op2.getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID)); + + assert(SuperReg && "expected register pair"); + + unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1); + + Operands.insert( + Operands.begin() + 3, + ARMOperand::CreateReg(PairedReg, Op2.getStartLoc(), Op2.getEndLoc())); } } @@ -5261,19 +5572,13 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // so the Mnemonic is the original name "subs" and delete the predicate // operand so it will match the table entry. if (isThumbTwo() && Mnemonic == "sub" && Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::PC && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::LR && - static_cast<ARMOperand*>(Operands[5])->isImm()) { - ARMOperand *Op0 = static_cast<ARMOperand*>(Operands[0]); - Operands.erase(Operands.begin()); - delete Op0; - Operands.insert(Operands.begin(), ARMOperand::CreateToken(Name, NameLoc)); - - ARMOperand *Op1 = static_cast<ARMOperand*>(Operands[1]); + static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::PC && + static_cast<ARMOperand &>(*Operands[4]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::LR && + static_cast<ARMOperand &>(*Operands[5]).isImm()) { + Operands.front() = ARMOperand::CreateToken(Name, NameLoc); Operands.erase(Operands.begin() + 1); - delete Op1; } return false; } @@ -5319,9 +5624,8 @@ static bool instIsBreakpoint(const MCInst &Inst) { } // FIXME: We would really like to be able to tablegen'erate this. -bool ARMAsmParser:: -validateInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool ARMAsmParser::validateInstruction(MCInst &Inst, + const OperandVector &Operands) { const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); SMLoc Loc = Operands[0]->getStartLoc(); @@ -5344,7 +5648,7 @@ validateInstruction(MCInst &Inst, // Find the condition code Operand to get its SMLoc information. SMLoc CondLoc; for (unsigned I = 1; I < Operands.size(); ++I) - if (static_cast<ARMOperand*>(Operands[I])->isCondCode()) + if (static_cast<ARMOperand &>(*Operands[I]).isCondCode()) CondLoc = Operands[I]->getStartLoc(); return Error(CondLoc, "incorrect condition in IT block; got '" + StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) + @@ -5444,8 +5748,8 @@ validateInstruction(MCInst &Inst, // in the register list. unsigned Rn = Inst.getOperand(0).getReg(); bool HasWritebackToken = - (static_cast<ARMOperand*>(Operands[3])->isToken() && - static_cast<ARMOperand*>(Operands[3])->getToken() == "!"); + (static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == "!"); bool ListContainsBase; if (checkLowRegisterList(Inst, 3, Rn, 0, ListContainsBase) && !isThumbTwo()) return Error(Operands[3 + HasWritebackToken]->getStartLoc(), @@ -5496,7 +5800,6 @@ validateInstruction(MCInst &Inst, case ARM::sysSTMIB_UPD: return Error(Operands[2]->getStartLoc(), "system STM cannot have writeback register"); - break; case ARM::tMUL: { // The second source operand must be the same register as the destination // operand. @@ -5506,11 +5809,10 @@ validateInstruction(MCInst &Inst, // this first statement is always true for the new Inst. Essentially, the // destination is unconditionally copied into the second source operand // without checking to see if it matches what we actually parsed. - if (Operands.size() == 6 && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[5])->getReg()) && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[4])->getReg())) { + if (Operands.size() == 6 && (((ARMOperand &)*Operands[3]).getReg() != + ((ARMOperand &)*Operands[5]).getReg()) && + (((ARMOperand &)*Operands[3]).getReg() != + ((ARMOperand &)*Operands[4]).getReg())) { return Error(Operands[3]->getStartLoc(), "destination register must match source register"); } @@ -5563,26 +5865,50 @@ validateInstruction(MCInst &Inst, } // Final range checking for Thumb unconditional branch instructions. case ARM::tB: - if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<11, 1>()) + if (!(static_cast<ARMOperand &>(*Operands[2])).isSignedOffset<11, 1>()) return Error(Operands[2]->getStartLoc(), "branch target out of range"); break; case ARM::t2B: { int op = (Operands[2]->isImm()) ? 2 : 3; - if (!(static_cast<ARMOperand*>(Operands[op]))->isSignedOffset<24, 1>()) + if (!static_cast<ARMOperand &>(*Operands[op]).isSignedOffset<24, 1>()) return Error(Operands[op]->getStartLoc(), "branch target out of range"); break; } // Final range checking for Thumb conditional branch instructions. case ARM::tBcc: - if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<8, 1>()) + if (!static_cast<ARMOperand &>(*Operands[2]).isSignedOffset<8, 1>()) return Error(Operands[2]->getStartLoc(), "branch target out of range"); break; case ARM::t2Bcc: { int Op = (Operands[2]->isImm()) ? 2 : 3; - if (!(static_cast<ARMOperand*>(Operands[Op]))->isSignedOffset<20, 1>()) + if (!static_cast<ARMOperand &>(*Operands[Op]).isSignedOffset<20, 1>()) return Error(Operands[Op]->getStartLoc(), "branch target out of range"); break; } + case ARM::MOVi16: + case ARM::t2MOVi16: + case ARM::t2MOVTi16: + { + // We want to avoid misleadingly allowing something like "mov r0, <symbol>" + // especially when we turn it into a movw and the expression <symbol> does + // not have a :lower16: or :upper16 as part of the expression. We don't + // want the behavior of silently truncating, which can be unexpected and + // lead to bugs that are difficult to find since this is an easy mistake + // to make. + int i = (Operands[3]->isImm()) ? 3 : 4; + ARMOperand &Op = static_cast<ARMOperand &>(*Operands[i]); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()); + if (CE) break; + const MCExpr *E = dyn_cast<MCExpr>(Op.getImm()); + if (!E) break; + const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(E); + if (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 && + ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)) + return Error( + Op.getStartLoc(), + "immediate expression for mov requires :lower16: or :upper16"); + break; + } } return false; @@ -5733,7 +6059,7 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD; - case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD; case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; @@ -5789,7 +6115,7 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { case ARM::VLD4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; case ARM::VLD4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; case ARM::VLD4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD; - case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNq16_UPD; + case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD; case ARM::VLD4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD; case ARM::VLD4LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; case ARM::VLD4LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; @@ -5844,10 +6170,45 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { } } -bool ARMAsmParser:: -processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool ARMAsmParser::processInstruction(MCInst &Inst, + const OperandVector &Operands) { switch (Inst.getOpcode()) { + // Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction. + case ARM::LDRT_POST: + case ARM::LDRBT_POST: { + const unsigned Opcode = + (Inst.getOpcode() == ARM::LDRT_POST) ? ARM::LDRT_POST_IMM + : ARM::LDRBT_POST_IMM; + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + return true; + } + // Alias for alternate form of 'str{,b}t Rt, [Rn], #imm' instruction. + case ARM::STRT_POST: + case ARM::STRBT_POST: { + const unsigned Opcode = + (Inst.getOpcode() == ARM::STRT_POST) ? ARM::STRT_POST_IMM + : ARM::STRBT_POST_IMM; + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + return true; + } // Alias for alternate form of 'ADR Rd, #imm' instruction. case ARM::ADDri: { if (Inst.getOperand(1).getReg() != ARM::PC || @@ -5867,8 +6228,8 @@ processInstruction(MCInst &Inst, // Select the narrow version if the immediate will fit. if (Inst.getOperand(1).getImm() > 0 && Inst.getOperand(1).getImm() <= 0xff && - !(static_cast<ARMOperand*>(Operands[2])->isToken() && - static_cast<ARMOperand*>(Operands[2])->getToken() == ".w")) + !(static_cast<ARMOperand &>(*Operands[2]).isToken() && + static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w")) Inst.setOpcode(ARM::tLDRpci); else Inst.setOpcode(ARM::t2LDRpci); @@ -6958,8 +7319,8 @@ processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && - !(static_cast<ARMOperand*>(Operands[3])->isToken() && - static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) { + !(static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -7162,7 +7523,7 @@ processInstruction(MCInst &Inst, case ARM::LDMIA_UPD: // If this is a load of a single register via a 'pop', then we should use // a post-indexed LDR instruction instead, per the ARM ARM. - if (static_cast<ARMOperand*>(Operands[0])->getToken() == "pop" && + if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "pop" && Inst.getNumOperands() == 5) { MCInst TmpInst; TmpInst.setOpcode(ARM::LDR_POST_IMM); @@ -7180,7 +7541,7 @@ processInstruction(MCInst &Inst, case ARM::STMDB_UPD: // If this is a store of a single register via a 'push', then we should use // a pre-indexed STR instruction instead, per the ARM ARM. - if (static_cast<ARMOperand*>(Operands[0])->getToken() == "push" && + if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "push" && Inst.getNumOperands() == 5) { MCInst TmpInst; TmpInst.setOpcode(ARM::STR_PRE_IMM); @@ -7196,7 +7557,7 @@ processInstruction(MCInst &Inst, case ARM::t2ADDri12: // If the immediate fits for encoding T3 (t2ADDri) and the generic "add" // mnemonic was used (not "addw"), encoding T3 is preferred. - if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" || + if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "add" || ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) break; Inst.setOpcode(ARM::t2ADDri); @@ -7205,7 +7566,7 @@ processInstruction(MCInst &Inst, case ARM::t2SUBri12: // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub" // mnemonic was used (not "subw"), encoding T3 is preferred. - if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" || + if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "sub" || ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) break; Inst.setOpcode(ARM::t2SUBri); @@ -7241,9 +7602,9 @@ processInstruction(MCInst &Inst, !isARMLowRegister(Inst.getOperand(0).getReg()) || (unsigned)Inst.getOperand(2).getImm() > 255 || ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != 0)) || - (static_cast<ARMOperand*>(Operands[3])->isToken() && - static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + (inITBlock() && Inst.getOperand(5).getReg() != 0)) || + (static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) break; MCInst TmpInst; TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ? @@ -7264,8 +7625,8 @@ processInstruction(MCInst &Inst, // 'as' behaviour. Make sure the wide encoding wasn't explicit. if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || Inst.getOperand(5).getReg() != 0 || - (static_cast<ARMOperand*>(Operands[3])->isToken() && - static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + (static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) break; MCInst TmpInst; TmpInst.setOpcode(ARM::tADDhirr); @@ -7322,8 +7683,8 @@ processInstruction(MCInst &Inst, // an error in validateInstruction(). unsigned Rn = Inst.getOperand(0).getReg(); bool hasWritebackToken = - (static_cast<ARMOperand*>(Operands[3])->isToken() && - static_cast<ARMOperand*>(Operands[3])->getToken() == "!"); + (static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == "!"); bool listContainsBase; if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) || (!listContainsBase && !hasWritebackToken) || @@ -7385,10 +7746,10 @@ processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && (unsigned)Inst.getOperand(1).getImm() <= 255 && ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && - Inst.getOperand(4).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(4).getReg() == 0)) && - (!static_cast<ARMOperand*>(Operands[2])->isToken() || - static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + Inst.getOperand(4).getReg() == ARM::CPSR) || + (inITBlock() && Inst.getOperand(4).getReg() == 0)) && + (!static_cast<ARMOperand &>(*Operands[2]).isToken() || + static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { // The operands aren't in the same order for tMOVi8... MCInst TmpInst; TmpInst.setOpcode(ARM::tMOVi8); @@ -7409,8 +7770,8 @@ processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR && - (!static_cast<ARMOperand*>(Operands[2])->isToken() || - static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + (!static_cast<ARMOperand &>(*Operands[2]).isToken() || + static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { // The operands aren't the same for tMOV[S]r... (no cc_out) MCInst TmpInst; TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr); @@ -7432,8 +7793,8 @@ processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == 0 && - (!static_cast<ARMOperand*>(Operands[2])->isToken() || - static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + (!static_cast<ARMOperand &>(*Operands[2]).isToken() || + static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("Illegal opcode!"); @@ -7545,9 +7906,10 @@ processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(2).getReg())) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast<ARMOperand*>(Operands[3])->isToken() || - !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) { + (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && + (!static_cast<ARMOperand &>(*Operands[3]).isToken() || + !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower( + ".w"))) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -7584,9 +7946,10 @@ processInstruction(MCInst &Inst, (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() || Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) && ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast<ARMOperand*>(Operands[3])->isToken() || - !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) { + (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && + (!static_cast<ARMOperand &>(*Operands[3]).isToken() || + !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower( + ".w"))) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -7659,12 +8022,17 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_Success; } +namespace llvm { +template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) { + return true; // In an assembly source, no need to second-guess +} +} + static const char *getSubtargetFeatureName(unsigned Val); -bool ARMAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) { +bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; unsigned MatchResult; @@ -7694,7 +8062,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Only after the instruction is fully processed, we can validate it if (wasInITBlock && hasV8Ops() && isThumb() && - !isV8EligibleForIT(&Inst, 2)) { + !isV8EligibleForIT(&Inst)) { Warning(IDLoc, "deprecated instruction in IT block"); } } @@ -7710,7 +8078,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return false; Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + Out.EmitInstruction(Inst, STI); return false; case Match_MissingFeature: { assert(ErrorInfo && "Unknown missing feature!"); @@ -7733,7 +8101,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } @@ -7741,7 +8109,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_MnemonicFail: return Error(IDLoc, "invalid instruction", - ((ARMOperand*)Operands[0])->getLocRange()); + ((ARMOperand &)*Operands[0]).getLocRange()); case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); case Match_RequiresITBlock: @@ -7751,15 +8119,51 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_RequiresThumb2: return Error(IDLoc, "instruction variant requires Thumb2"); case Match_ImmRange0_15: { - SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; return Error(ErrorLoc, "immediate operand must be in the range [0,15]"); } case Match_ImmRange0_239: { - SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; return Error(ErrorLoc, "immediate operand must be in the range [0,239]"); } + case Match_AlignedMemoryRequiresNone: + case Match_DupAlignedMemoryRequiresNone: + case Match_AlignedMemoryRequires16: + case Match_DupAlignedMemoryRequires16: + case Match_AlignedMemoryRequires32: + case Match_DupAlignedMemoryRequires32: + case Match_AlignedMemoryRequires64: + case Match_DupAlignedMemoryRequires64: + case Match_AlignedMemoryRequires64or128: + case Match_DupAlignedMemoryRequires64or128: + case Match_AlignedMemoryRequires64or128or256: + { + SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getAlignmentLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + switch (MatchResult) { + default: + llvm_unreachable("Missing Match_Aligned type"); + case Match_AlignedMemoryRequiresNone: + case Match_DupAlignedMemoryRequiresNone: + return Error(ErrorLoc, "alignment must be omitted"); + case Match_AlignedMemoryRequires16: + case Match_DupAlignedMemoryRequires16: + return Error(ErrorLoc, "alignment must be 16 or omitted"); + case Match_AlignedMemoryRequires32: + case Match_DupAlignedMemoryRequires32: + return Error(ErrorLoc, "alignment must be 32 or omitted"); + case Match_AlignedMemoryRequires64: + case Match_DupAlignedMemoryRequires64: + return Error(ErrorLoc, "alignment must be 64 or omitted"); + case Match_AlignedMemoryRequires64or128: + case Match_DupAlignedMemoryRequires64or128: + return Error(ErrorLoc, "alignment must be 64, 128 or omitted"); + case Match_AlignedMemoryRequires64or128or256: + return Error(ErrorLoc, "alignment must be 64, 128, 256 or omitted"); + } + } } llvm_unreachable("Implement any new match types added!"); @@ -7767,9 +8171,15 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, /// parseDirective parses the arm specific directives bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { + const MCObjectFileInfo::Environment Format = + getContext().getObjectFileInfo()->getObjectFileType(); + bool IsMachO = Format == MCObjectFileInfo::IsMachO; + StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") - return parseDirectiveWord(4, DirectiveID.getLoc()); + return parseLiteralValues(4, DirectiveID.getLoc()); + else if (IDVal == ".short" || IDVal == ".hword") + return parseLiteralValues(2, DirectiveID.getLoc()); else if (IDVal == ".thumb") return parseDirectiveThumb(DirectiveID.getLoc()); else if (IDVal == ".arm") @@ -7782,16 +8192,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveSyntax(DirectiveID.getLoc()); else if (IDVal == ".unreq") return parseDirectiveUnreq(DirectiveID.getLoc()); - else if (IDVal == ".arch") - return parseDirectiveArch(DirectiveID.getLoc()); - else if (IDVal == ".eabi_attribute") - return parseDirectiveEabiAttr(DirectiveID.getLoc()); - else if (IDVal == ".cpu") - return parseDirectiveCPU(DirectiveID.getLoc()); - else if (IDVal == ".fpu") - return parseDirectiveFPU(DirectiveID.getLoc()); - else if (IDVal == ".fnstart") - return parseDirectiveFnStart(DirectiveID.getLoc()); else if (IDVal == ".fnend") return parseDirectiveFnEnd(DirectiveID.getLoc()); else if (IDVal == ".cantunwind") @@ -7808,17 +8208,61 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveRegSave(DirectiveID.getLoc(), false); else if (IDVal == ".vsave") return parseDirectiveRegSave(DirectiveID.getLoc(), true); + else if (IDVal == ".ltorg" || IDVal == ".pool") + return parseDirectiveLtorg(DirectiveID.getLoc()); + else if (IDVal == ".even") + return parseDirectiveEven(DirectiveID.getLoc()); + else if (IDVal == ".personalityindex") + return parseDirectivePersonalityIndex(DirectiveID.getLoc()); + else if (IDVal == ".unwind_raw") + return parseDirectiveUnwindRaw(DirectiveID.getLoc()); + else if (IDVal == ".movsp") + return parseDirectiveMovSP(DirectiveID.getLoc()); + else if (IDVal == ".arch_extension") + return parseDirectiveArchExtension(DirectiveID.getLoc()); + else if (IDVal == ".align") + return parseDirectiveAlign(DirectiveID.getLoc()); + else if (IDVal == ".thumb_set") + return parseDirectiveThumbSet(DirectiveID.getLoc()); + + if (!IsMachO) { + if (IDVal == ".arch") + return parseDirectiveArch(DirectiveID.getLoc()); + else if (IDVal == ".cpu") + return parseDirectiveCPU(DirectiveID.getLoc()); + else if (IDVal == ".eabi_attribute") + return parseDirectiveEabiAttr(DirectiveID.getLoc()); + else if (IDVal == ".fpu") + return parseDirectiveFPU(DirectiveID.getLoc()); + else if (IDVal == ".fnstart") + return parseDirectiveFnStart(DirectiveID.getLoc()); + else if (IDVal == ".inst") + return parseDirectiveInst(DirectiveID.getLoc()); + else if (IDVal == ".inst.n") + return parseDirectiveInst(DirectiveID.getLoc(), 'n'); + else if (IDVal == ".inst.w") + return parseDirectiveInst(DirectiveID.getLoc(), 'w'); + else if (IDVal == ".object_arch") + return parseDirectiveObjectArch(DirectiveID.getLoc()); + else if (IDVal == ".tlsdescseq") + return parseDirectiveTLSDescSeq(DirectiveID.getLoc()); + } + return true; } -/// parseDirectiveWord -/// ::= .word [ expression (, expression)* ] -bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { +/// parseLiteralValues +/// ::= .hword expression [, expression]* +/// ::= .short expression [, expression]* +/// ::= .word expression [, expression]* +bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - if (getParser().parseExpression(Value)) - return true; + if (getParser().parseExpression(Value)) { + Parser.eatToEndOfStatement(); + return false; + } getParser().getStreamer().EmitValue(Value, Size); @@ -7826,8 +8270,10 @@ bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { break; // FIXME: Improve diagnostic. - if (getLexer().isNot(AsmToken::Comma)) - return Error(L, "unexpected token in directive"); + if (getLexer().isNot(AsmToken::Comma)) { + Error(L, "unexpected token in directive"); + return false; + } Parser.Lex(); } } @@ -7839,15 +8285,20 @@ bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { /// parseDirectiveThumb /// ::= .thumb bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { - if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(L, "unexpected token in directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(L, "unexpected token in directive"); + return false; + } Parser.Lex(); - if (!hasThumb()) - return Error(L, "target does not support Thumb mode"); + if (!hasThumb()) { + Error(L, "target does not support Thumb mode"); + return false; + } if (!isThumb()) SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); return false; } @@ -7855,15 +8306,20 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { /// parseDirectiveARM /// ::= .arm bool ARMAsmParser::parseDirectiveARM(SMLoc L) { - if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(L, "unexpected token in directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(L, "unexpected token in directive"); + return false; + } Parser.Lex(); - if (!hasARM()) - return Error(L, "target does not support ARM mode"); + if (!hasARM()) { + Error(L, "target does not support ARM mode"); + return false; + } if (isThumb()) SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); return false; } @@ -7886,8 +8342,11 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { if (isMachO) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::EndOfStatement)) { - if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) - return Error(L, "unexpected token in .thumb_func directive"); + if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) { + Error(L, "unexpected token in .thumb_func directive"); + return false; + } + MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Tok.getIdentifier()); getParser().getStreamer().EmitThumbFunc(Func); @@ -7896,11 +8355,12 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { } } - if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(L, "unexpected token in directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(L, "unexpected token in directive"); + return false; + } NextSymbolIsThumb = true; - return false; } @@ -7908,18 +8368,26 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { /// ::= .syntax unified | divided bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Identifier)) - return Error(L, "unexpected token in .syntax directive"); + if (Tok.isNot(AsmToken::Identifier)) { + Error(L, "unexpected token in .syntax directive"); + return false; + } + StringRef Mode = Tok.getString(); - if (Mode == "unified" || Mode == "UNIFIED") + if (Mode == "unified" || Mode == "UNIFIED") { Parser.Lex(); - else if (Mode == "divided" || Mode == "DIVIDED") - return Error(L, "'.syntax divided' arm asssembly not supported"); - else - return Error(L, "unrecognized syntax mode in .syntax directive"); + } else if (Mode == "divided" || Mode == "DIVIDED") { + Error(L, "'.syntax divided' arm asssembly not supported"); + return false; + } else { + Error(L, "unrecognized syntax mode in .syntax directive"); + return false; + } - if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(Parser.getTok().getLoc(), "unexpected token in directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(Parser.getTok().getLoc(), "unexpected token in directive"); + return false; + } Parser.Lex(); // TODO tell the MC streamer the mode @@ -7931,30 +8399,37 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { /// ::= .code 16 | 32 bool ARMAsmParser::parseDirectiveCode(SMLoc L) { const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Integer)) - return Error(L, "unexpected token in .code directive"); + if (Tok.isNot(AsmToken::Integer)) { + Error(L, "unexpected token in .code directive"); + return false; + } int64_t Val = Parser.getTok().getIntVal(); - if (Val == 16) - Parser.Lex(); - else if (Val == 32) - Parser.Lex(); - else - return Error(L, "invalid operand to .code directive"); + if (Val != 16 && Val != 32) { + Error(L, "invalid operand to .code directive"); + return false; + } + Parser.Lex(); - if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(Parser.getTok().getLoc(), "unexpected token in directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(Parser.getTok().getLoc(), "unexpected token in directive"); + return false; + } Parser.Lex(); if (Val == 16) { - if (!hasThumb()) - return Error(L, "target does not support Thumb mode"); + if (!hasThumb()) { + Error(L, "target does not support Thumb mode"); + return false; + } if (!isThumb()) SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); } else { - if (!hasARM()) - return Error(L, "target does not support ARM mode"); + if (!hasARM()) { + Error(L, "target does not support ARM mode"); + return false; + } if (isThumb()) SwitchMode(); @@ -7972,21 +8447,23 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { SMLoc SRegLoc, ERegLoc; if (ParseRegister(Reg, SRegLoc, ERegLoc)) { Parser.eatToEndOfStatement(); - return Error(SRegLoc, "register name expected"); + Error(SRegLoc, "register name expected"); + return false; } // Shouldn't be anything else. if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { Parser.eatToEndOfStatement(); - return Error(Parser.getTok().getLoc(), - "unexpected input in .req directive."); + Error(Parser.getTok().getLoc(), "unexpected input in .req directive."); + return false; } Parser.Lex(); // Consume the EndOfStatement - if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) - return Error(SRegLoc, "redefinition of '" + Name + - "' does not match original."); + if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) { + Error(SRegLoc, "redefinition of '" + Name + "' does not match original."); + return false; + } return false; } @@ -7996,9 +8473,10 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { if (Parser.getTok().isNot(AsmToken::Identifier)) { Parser.eatToEndOfStatement(); - return Error(L, "unexpected input in .unreq directive."); + Error(L, "unexpected input in .unreq directive."); + return false; } - RegisterReqs.erase(Parser.getTok().getIdentifier()); + RegisterReqs.erase(Parser.getTok().getIdentifier().lower()); Parser.Lex(); // Eat the identifier. return false; } @@ -8006,28 +8484,128 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { /// parseDirectiveArch /// ::= .arch token bool ARMAsmParser::parseDirectiveArch(SMLoc L) { - return true; + StringRef Arch = getParser().parseStringToEndOfStatement().trim(); + + unsigned ID = StringSwitch<unsigned>(Arch) +#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \ + .Case(NAME, ARM::ID) +#define ARM_ARCH_ALIAS(NAME, ID) \ + .Case(NAME, ARM::ID) +#include "MCTargetDesc/ARMArchName.def" + .Default(ARM::INVALID_ARCH); + + if (ID == ARM::INVALID_ARCH) { + Error(L, "Unknown arch name"); + return false; + } + + getTargetStreamer().emitArch(ID); + return false; } /// parseDirectiveEabiAttr -/// ::= .eabi_attribute int, int +/// ::= .eabi_attribute int, int [, "str"] +/// ::= .eabi_attribute Tag_name, int [, "str"] bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { - if (Parser.getTok().isNot(AsmToken::Integer)) - return Error(L, "integer expected"); - int64_t Tag = Parser.getTok().getIntVal(); - Parser.Lex(); // eat tag integer + int64_t Tag; + SMLoc TagLoc; + TagLoc = Parser.getTok().getLoc(); + if (Parser.getTok().is(AsmToken::Identifier)) { + StringRef Name = Parser.getTok().getIdentifier(); + Tag = ARMBuildAttrs::AttrTypeFromString(Name); + if (Tag == -1) { + Error(TagLoc, "attribute name not recognised: " + Name); + Parser.eatToEndOfStatement(); + return false; + } + Parser.Lex(); + } else { + const MCExpr *AttrExpr; + + TagLoc = Parser.getTok().getLoc(); + if (Parser.parseExpression(AttrExpr)) { + Parser.eatToEndOfStatement(); + return false; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(AttrExpr); + if (!CE) { + Error(TagLoc, "expected numeric constant"); + Parser.eatToEndOfStatement(); + return false; + } - if (Parser.getTok().isNot(AsmToken::Comma)) - return Error(L, "comma expected"); + Tag = CE->getValue(); + } + + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "comma expected"); + Parser.eatToEndOfStatement(); + return false; + } Parser.Lex(); // skip comma - L = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::Integer)) - return Error(L, "integer expected"); - int64_t Value = Parser.getTok().getIntVal(); - Parser.Lex(); // eat value integer + StringRef StringValue = ""; + bool IsStringValue = false; + + int64_t IntegerValue = 0; + bool IsIntegerValue = false; + + if (Tag == ARMBuildAttrs::CPU_raw_name || Tag == ARMBuildAttrs::CPU_name) + IsStringValue = true; + else if (Tag == ARMBuildAttrs::compatibility) { + IsStringValue = true; + IsIntegerValue = true; + } else if (Tag < 32 || Tag % 2 == 0) + IsIntegerValue = true; + else if (Tag % 2 == 1) + IsStringValue = true; + else + llvm_unreachable("invalid tag type"); + + if (IsIntegerValue) { + const MCExpr *ValueExpr; + SMLoc ValueExprLoc = Parser.getTok().getLoc(); + if (Parser.parseExpression(ValueExpr)) { + Parser.eatToEndOfStatement(); + return false; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ValueExpr); + if (!CE) { + Error(ValueExprLoc, "expected numeric constant"); + Parser.eatToEndOfStatement(); + return false; + } - getTargetStreamer().emitAttribute(Tag, Value); + IntegerValue = CE->getValue(); + } + + if (Tag == ARMBuildAttrs::compatibility) { + if (Parser.getTok().isNot(AsmToken::Comma)) + IsStringValue = false; + else + Parser.Lex(); + } + + if (IsStringValue) { + if (Parser.getTok().isNot(AsmToken::String)) { + Error(Parser.getTok().getLoc(), "bad string constant"); + Parser.eatToEndOfStatement(); + return false; + } + + StringValue = Parser.getTok().getStringContents(); + Parser.Lex(); + } + + if (IsIntegerValue && IsStringValue) { + assert(Tag == ARMBuildAttrs::compatibility); + getTargetStreamer().emitIntTextAttribute(Tag, IntegerValue, StringValue); + } else if (IsIntegerValue) + getTargetStreamer().emitAttribute(Tag, IntegerValue); + else if (IsStringValue) + getTargetStreamer().emitTextAttribute(Tag, StringValue); return false; } @@ -8049,8 +8627,10 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { #include "ARMFPUName.def" .Default(ARM::INVALID_FPU); - if (ID == ARM::INVALID_FPU) - return Error(L, "Unknown FPU name"); + if (ID == ARM::INVALID_FPU) { + Error(L, "Unknown FPU name"); + return false; + } getTargetStreamer().emitFPU(ID); return false; @@ -8059,14 +8639,18 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { /// parseDirectiveFnStart /// ::= .fnstart bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { - if (FnStartLoc.isValid()) { + if (UC.hasFnStart()) { Error(L, ".fnstart starts before the end of previous one"); - Error(FnStartLoc, "previous .fnstart starts here"); - return true; + UC.emitFnStartLocNotes(); + return false; } - FnStartLoc = L; + // Reset the unwind directives parser state + UC.reset(); + getTargetStreamer().emitFnStart(); + + UC.recordFnStart(L); return false; } @@ -8074,31 +8658,37 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { /// ::= .fnend bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) { // Check the ordering of unwind directives - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .fnend directive"); + if (!UC.hasFnStart()) { + Error(L, ".fnstart must precede .fnend directive"); + return false; + } // Reset the unwind directives parser state - resetUnwindDirectiveParserState(); getTargetStreamer().emitFnEnd(); + + UC.reset(); return false; } /// parseDirectiveCantUnwind /// ::= .cantunwind bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) { + UC.recordCantUnwind(L); + // Check the ordering of unwind directives - CantUnwindLoc = L; - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .cantunwind directive"); - if (HandlerDataLoc.isValid()) { + if (!UC.hasFnStart()) { + Error(L, ".fnstart must precede .cantunwind directive"); + return false; + } + if (UC.hasHandlerData()) { Error(L, ".cantunwind can't be used with .handlerdata directive"); - Error(HandlerDataLoc, ".handlerdata was specified here"); - return true; + UC.emitHandlerDataLocNotes(); + return false; } - if (PersonalityLoc.isValid()) { + if (UC.hasPersonality()) { Error(L, ".cantunwind can't be used with .personality directive"); - Error(PersonalityLoc, ".personality was specified here"); - return true; + UC.emitPersonalityLocNotes(); + return false; } getTargetStreamer().emitCantUnwind(); @@ -8108,25 +8698,37 @@ bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) { /// parseDirectivePersonality /// ::= .personality name bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { + bool HasExistingPersonality = UC.hasPersonality(); + + UC.recordPersonality(L); + // Check the ordering of unwind directives - PersonalityLoc = L; - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .personality directive"); - if (CantUnwindLoc.isValid()) { + if (!UC.hasFnStart()) { + Error(L, ".fnstart must precede .personality directive"); + return false; + } + if (UC.cantUnwind()) { Error(L, ".personality can't be used with .cantunwind directive"); - Error(CantUnwindLoc, ".cantunwind was specified here"); - return true; + UC.emitCantUnwindLocNotes(); + return false; } - if (HandlerDataLoc.isValid()) { + if (UC.hasHandlerData()) { Error(L, ".personality must precede .handlerdata directive"); - Error(HandlerDataLoc, ".handlerdata was specified here"); - return true; + UC.emitHandlerDataLocNotes(); + return false; + } + if (HasExistingPersonality) { + Parser.eatToEndOfStatement(); + Error(L, "multiple personality directives"); + UC.emitPersonalityLocNotes(); + return false; } // Parse the name of the personality routine if (Parser.getTok().isNot(AsmToken::Identifier)) { Parser.eatToEndOfStatement(); - return Error(L, "unexpected input in .personality directive."); + Error(L, "unexpected input in .personality directive."); + return false; } StringRef Name(Parser.getTok().getIdentifier()); Parser.Lex(); @@ -8139,14 +8741,17 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { /// parseDirectiveHandlerData /// ::= .handlerdata bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { + UC.recordHandlerData(L); + // Check the ordering of unwind directives - HandlerDataLoc = L; - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .personality directive"); - if (CantUnwindLoc.isValid()) { + if (!UC.hasFnStart()) { + Error(L, ".fnstart must precede .personality directive"); + return false; + } + if (UC.cantUnwind()) { Error(L, ".handlerdata can't be used with .cantunwind directive"); - Error(CantUnwindLoc, ".cantunwind was specified here"); - return true; + UC.emitCantUnwindLocNotes(); + return false; } getTargetStreamer().emitHandlerData(); @@ -8157,34 +8762,45 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { /// ::= .setfp fpreg, spreg [, offset] bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { // Check the ordering of unwind directives - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .setfp directive"); - if (HandlerDataLoc.isValid()) - return Error(L, ".setfp must precede .handlerdata directive"); + if (!UC.hasFnStart()) { + Error(L, ".fnstart must precede .setfp directive"); + return false; + } + if (UC.hasHandlerData()) { + Error(L, ".setfp must precede .handlerdata directive"); + return false; + } // Parse fpreg - SMLoc NewFPRegLoc = Parser.getTok().getLoc(); - int NewFPReg = tryParseRegister(); - if (NewFPReg == -1) - return Error(NewFPRegLoc, "frame pointer register expected"); + SMLoc FPRegLoc = Parser.getTok().getLoc(); + int FPReg = tryParseRegister(); + if (FPReg == -1) { + Error(FPRegLoc, "frame pointer register expected"); + return false; + } // Consume comma - if (!Parser.getTok().is(AsmToken::Comma)) - return Error(Parser.getTok().getLoc(), "comma expected"); + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "comma expected"); + return false; + } Parser.Lex(); // skip comma // Parse spreg - SMLoc NewSPRegLoc = Parser.getTok().getLoc(); - int NewSPReg = tryParseRegister(); - if (NewSPReg == -1) - return Error(NewSPRegLoc, "stack pointer register expected"); + SMLoc SPRegLoc = Parser.getTok().getLoc(); + int SPReg = tryParseRegister(); + if (SPReg == -1) { + Error(SPRegLoc, "stack pointer register expected"); + return false; + } - if (NewSPReg != ARM::SP && NewSPReg != FPReg) - return Error(NewSPRegLoc, - "register should be either $sp or the latest fp register"); + if (SPReg != ARM::SP && SPReg != UC.getFPReg()) { + Error(SPRegLoc, "register should be either $sp or the latest fp register"); + return false; + } // Update the frame pointer register - FPReg = NewFPReg; + UC.saveFPReg(FPReg); // Parse offset int64_t Offset = 0; @@ -8193,24 +8809,29 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { if (Parser.getTok().isNot(AsmToken::Hash) && Parser.getTok().isNot(AsmToken::Dollar)) { - return Error(Parser.getTok().getLoc(), "'#' expected"); + Error(Parser.getTok().getLoc(), "'#' expected"); + return false; } Parser.Lex(); // skip hash token. const MCExpr *OffsetExpr; SMLoc ExLoc = Parser.getTok().getLoc(); SMLoc EndLoc; - if (getParser().parseExpression(OffsetExpr, EndLoc)) - return Error(ExLoc, "malformed setfp offset"); + if (getParser().parseExpression(OffsetExpr, EndLoc)) { + Error(ExLoc, "malformed setfp offset"); + return false; + } const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); - if (!CE) - return Error(ExLoc, "setfp offset must be an immediate"); + if (!CE) { + Error(ExLoc, "setfp offset must be an immediate"); + return false; + } Offset = CE->getValue(); } - getTargetStreamer().emitSetFP(static_cast<unsigned>(NewFPReg), - static_cast<unsigned>(NewSPReg), Offset); + getTargetStreamer().emitSetFP(static_cast<unsigned>(FPReg), + static_cast<unsigned>(SPReg), Offset); return false; } @@ -8218,26 +8839,35 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { /// ::= .pad offset bool ARMAsmParser::parseDirectivePad(SMLoc L) { // Check the ordering of unwind directives - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .pad directive"); - if (HandlerDataLoc.isValid()) - return Error(L, ".pad must precede .handlerdata directive"); + if (!UC.hasFnStart()) { + Error(L, ".fnstart must precede .pad directive"); + return false; + } + if (UC.hasHandlerData()) { + Error(L, ".pad must precede .handlerdata directive"); + return false; + } // Parse the offset if (Parser.getTok().isNot(AsmToken::Hash) && Parser.getTok().isNot(AsmToken::Dollar)) { - return Error(Parser.getTok().getLoc(), "'#' expected"); + Error(Parser.getTok().getLoc(), "'#' expected"); + return false; } Parser.Lex(); // skip hash token. const MCExpr *OffsetExpr; SMLoc ExLoc = Parser.getTok().getLoc(); SMLoc EndLoc; - if (getParser().parseExpression(OffsetExpr, EndLoc)) - return Error(ExLoc, "malformed pad offset"); + if (getParser().parseExpression(OffsetExpr, EndLoc)) { + Error(ExLoc, "malformed pad offset"); + return false; + } const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); - if (!CE) - return Error(ExLoc, "pad offset must be an immediate"); + if (!CE) { + Error(ExLoc, "pad offset must be an immediate"); + return false; + } getTargetStreamer().emitPad(CE->getValue()); return false; @@ -8248,37 +8878,470 @@ bool ARMAsmParser::parseDirectivePad(SMLoc L) { /// ::= .vsave { registers } bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { // Check the ordering of unwind directives - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .save or .vsave directives"); - if (HandlerDataLoc.isValid()) - return Error(L, ".save or .vsave must precede .handlerdata directive"); + if (!UC.hasFnStart()) { + Error(L, ".fnstart must precede .save or .vsave directives"); + return false; + } + if (UC.hasHandlerData()) { + Error(L, ".save or .vsave must precede .handlerdata directive"); + return false; + } // RAII object to make sure parsed operands are deleted. - struct CleanupObject { - SmallVector<MCParsedAsmOperand *, 1> Operands; - ~CleanupObject() { - for (unsigned I = 0, E = Operands.size(); I != E; ++I) - delete Operands[I]; - } - } CO; + SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands; // Parse the register list - if (parseRegisterList(CO.Operands)) + if (parseRegisterList(Operands)) + return false; + ARMOperand &Op = (ARMOperand &)*Operands[0]; + if (!IsVector && !Op.isRegList()) { + Error(L, ".save expects GPR registers"); + return false; + } + if (IsVector && !Op.isDPRRegList()) { + Error(L, ".vsave expects DPR registers"); + return false; + } + + getTargetStreamer().emitRegSave(Op.getRegList(), IsVector); + return false; +} + +/// parseDirectiveInst +/// ::= .inst opcode [, ...] +/// ::= .inst.n opcode [, ...] +/// ::= .inst.w opcode [, ...] +bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { + int Width; + + if (isThumb()) { + switch (Suffix) { + case 'n': + Width = 2; + break; + case 'w': + Width = 4; + break; + default: + Parser.eatToEndOfStatement(); + Error(Loc, "cannot determine Thumb instruction size, " + "use inst.n/inst.w instead"); + return false; + } + } else { + if (Suffix) { + Parser.eatToEndOfStatement(); + Error(Loc, "width suffixes are invalid in ARM mode"); + return false; + } + Width = 4; + } + + if (getLexer().is(AsmToken::EndOfStatement)) { + Parser.eatToEndOfStatement(); + Error(Loc, "expected expression following directive"); + return false; + } + + for (;;) { + const MCExpr *Expr; + + if (getParser().parseExpression(Expr)) { + Error(Loc, "expected expression"); + return false; + } + + const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr); + if (!Value) { + Error(Loc, "expected constant expression"); + return false; + } + + switch (Width) { + case 2: + if (Value->getValue() > 0xffff) { + Error(Loc, "inst.n operand is too big, use inst.w instead"); + return false; + } + break; + case 4: + if (Value->getValue() > 0xffffffff) { + Error(Loc, + StringRef(Suffix ? "inst.w" : "inst") + " operand is too big"); + return false; + } + break; + default: + llvm_unreachable("only supported widths are 2 and 4"); + } + + getTargetStreamer().emitInst(Value->getValue(), Suffix); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) { + Error(Loc, "unexpected token in directive"); + return false; + } + + Parser.Lex(); + } + + Parser.Lex(); + return false; +} + +/// parseDirectiveLtorg +/// ::= .ltorg | .pool +bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) { + getTargetStreamer().emitCurrentConstantPool(); + return false; +} + +bool ARMAsmParser::parseDirectiveEven(SMLoc L) { + const MCSection *Section = getStreamer().getCurrentSection().first; + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + TokError("unexpected token in directive"); + return false; + } + + if (!Section) { + getStreamer().InitSections(); + Section = getStreamer().getCurrentSection().first; + } + + assert(Section && "must have section to emit alignment"); + if (Section->UseCodeAlign()) + getStreamer().EmitCodeAlignment(2); + else + getStreamer().EmitValueToAlignment(2); + + return false; +} + +/// parseDirectivePersonalityIndex +/// ::= .personalityindex index +bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) { + bool HasExistingPersonality = UC.hasPersonality(); + + UC.recordPersonalityIndex(L); + + if (!UC.hasFnStart()) { + Parser.eatToEndOfStatement(); + Error(L, ".fnstart must precede .personalityindex directive"); + return false; + } + if (UC.cantUnwind()) { + Parser.eatToEndOfStatement(); + Error(L, ".personalityindex cannot be used with .cantunwind"); + UC.emitCantUnwindLocNotes(); + return false; + } + if (UC.hasHandlerData()) { + Parser.eatToEndOfStatement(); + Error(L, ".personalityindex must precede .handlerdata directive"); + UC.emitHandlerDataLocNotes(); + return false; + } + if (HasExistingPersonality) { + Parser.eatToEndOfStatement(); + Error(L, "multiple personality directives"); + UC.emitPersonalityLocNotes(); + return false; + } + + const MCExpr *IndexExpression; + SMLoc IndexLoc = Parser.getTok().getLoc(); + if (Parser.parseExpression(IndexExpression)) { + Parser.eatToEndOfStatement(); + return false; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(IndexExpression); + if (!CE) { + Parser.eatToEndOfStatement(); + Error(IndexLoc, "index must be a constant number"); + return false; + } + if (CE->getValue() < 0 || + CE->getValue() >= ARM::EHABI::NUM_PERSONALITY_INDEX) { + Parser.eatToEndOfStatement(); + Error(IndexLoc, "personality routine index should be in range [0-3]"); + return false; + } + + getTargetStreamer().emitPersonalityIndex(CE->getValue()); + return false; +} + +/// parseDirectiveUnwindRaw +/// ::= .unwind_raw offset, opcode [, opcode...] +bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) { + if (!UC.hasFnStart()) { + Parser.eatToEndOfStatement(); + Error(L, ".fnstart must precede .unwind_raw directives"); + return false; + } + + int64_t StackOffset; + + const MCExpr *OffsetExpr; + SMLoc OffsetLoc = getLexer().getLoc(); + if (getLexer().is(AsmToken::EndOfStatement) || + getParser().parseExpression(OffsetExpr)) { + Error(OffsetLoc, "expected expression"); + Parser.eatToEndOfStatement(); + return false; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); + if (!CE) { + Error(OffsetLoc, "offset must be a constant"); + Parser.eatToEndOfStatement(); + return false; + } + + StackOffset = CE->getValue(); + + if (getLexer().isNot(AsmToken::Comma)) { + Error(getLexer().getLoc(), "expected comma"); + Parser.eatToEndOfStatement(); + return false; + } + Parser.Lex(); + + SmallVector<uint8_t, 16> Opcodes; + for (;;) { + const MCExpr *OE; + + SMLoc OpcodeLoc = getLexer().getLoc(); + if (getLexer().is(AsmToken::EndOfStatement) || Parser.parseExpression(OE)) { + Error(OpcodeLoc, "expected opcode expression"); + Parser.eatToEndOfStatement(); + return false; + } + + const MCConstantExpr *OC = dyn_cast<MCConstantExpr>(OE); + if (!OC) { + Error(OpcodeLoc, "opcode value must be a constant"); + Parser.eatToEndOfStatement(); + return false; + } + + const int64_t Opcode = OC->getValue(); + if (Opcode & ~0xff) { + Error(OpcodeLoc, "invalid opcode"); + Parser.eatToEndOfStatement(); + return false; + } + + Opcodes.push_back(uint8_t(Opcode)); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) { + Error(getLexer().getLoc(), "unexpected token in directive"); + Parser.eatToEndOfStatement(); + return false; + } + + Parser.Lex(); + } + + getTargetStreamer().emitUnwindRaw(StackOffset, Opcodes); + + Parser.Lex(); + return false; +} + +/// parseDirectiveTLSDescSeq +/// ::= .tlsdescseq tls-variable +bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { + if (getLexer().isNot(AsmToken::Identifier)) { + TokError("expected variable after '.tlsdescseq' directive"); + Parser.eatToEndOfStatement(); + return false; + } + + const MCSymbolRefExpr *SRE = + MCSymbolRefExpr::Create(Parser.getTok().getIdentifier(), + MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext()); + Lex(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(Parser.getTok().getLoc(), "unexpected token"); + Parser.eatToEndOfStatement(); + return false; + } + + getTargetStreamer().AnnotateTLSDescriptorSequence(SRE); + return false; +} + +/// parseDirectiveMovSP +/// ::= .movsp reg [, #offset] +bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { + if (!UC.hasFnStart()) { + Parser.eatToEndOfStatement(); + Error(L, ".fnstart must precede .movsp directives"); + return false; + } + if (UC.getFPReg() != ARM::SP) { + Parser.eatToEndOfStatement(); + Error(L, "unexpected .movsp directive"); + return false; + } + + SMLoc SPRegLoc = Parser.getTok().getLoc(); + int SPReg = tryParseRegister(); + if (SPReg == -1) { + Parser.eatToEndOfStatement(); + Error(SPRegLoc, "register expected"); + return false; + } + + if (SPReg == ARM::SP || SPReg == ARM::PC) { + Parser.eatToEndOfStatement(); + Error(SPRegLoc, "sp and pc are not permitted in .movsp directive"); + return false; + } + + int64_t Offset = 0; + if (Parser.getTok().is(AsmToken::Comma)) { + Parser.Lex(); + + if (Parser.getTok().isNot(AsmToken::Hash)) { + Error(Parser.getTok().getLoc(), "expected #constant"); + Parser.eatToEndOfStatement(); + return false; + } + Parser.Lex(); + + const MCExpr *OffsetExpr; + SMLoc OffsetLoc = Parser.getTok().getLoc(); + if (Parser.parseExpression(OffsetExpr)) { + Parser.eatToEndOfStatement(); + Error(OffsetLoc, "malformed offset expression"); + return false; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr); + if (!CE) { + Parser.eatToEndOfStatement(); + Error(OffsetLoc, "offset must be an immediate constant"); + return false; + } + + Offset = CE->getValue(); + } + + getTargetStreamer().emitMovSP(SPReg, Offset); + UC.saveFPReg(SPReg); + + return false; +} + +/// parseDirectiveObjectArch +/// ::= .object_arch name +bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) { + if (getLexer().isNot(AsmToken::Identifier)) { + Error(getLexer().getLoc(), "unexpected token"); + Parser.eatToEndOfStatement(); + return false; + } + + StringRef Arch = Parser.getTok().getString(); + SMLoc ArchLoc = Parser.getTok().getLoc(); + getLexer().Lex(); + + unsigned ID = StringSwitch<unsigned>(Arch) +#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \ + .Case(NAME, ARM::ID) +#define ARM_ARCH_ALIAS(NAME, ID) \ + .Case(NAME, ARM::ID) +#include "MCTargetDesc/ARMArchName.def" +#undef ARM_ARCH_NAME +#undef ARM_ARCH_ALIAS + .Default(ARM::INVALID_ARCH); + + if (ID == ARM::INVALID_ARCH) { + Error(ArchLoc, "unknown architecture '" + Arch + "'"); + Parser.eatToEndOfStatement(); + return false; + } + + getTargetStreamer().emitObjectArch(ID); + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(getLexer().getLoc(), "unexpected token"); + Parser.eatToEndOfStatement(); + } + + return false; +} + +/// parseDirectiveAlign +/// ::= .align +bool ARMAsmParser::parseDirectiveAlign(SMLoc L) { + // NOTE: if this is not the end of the statement, fall back to the target + // agnostic handling for this directive which will correctly handle this. + if (getLexer().isNot(AsmToken::EndOfStatement)) return true; - ARMOperand *Op = (ARMOperand*)CO.Operands[0]; - if (!IsVector && !Op->isRegList()) - return Error(L, ".save expects GPR registers"); - if (IsVector && !Op->isDPRRegList()) - return Error(L, ".vsave expects DPR registers"); - getTargetStreamer().emitRegSave(Op->getRegList(), IsVector); + // '.align' is target specifically handled to mean 2**2 byte alignment. + if (getStreamer().getCurrentSection().first->UseCodeAlign()) + getStreamer().EmitCodeAlignment(4, 0); + else + getStreamer().EmitValueToAlignment(4, 0, 1, 0); + + return false; +} + +/// parseDirectiveThumbSet +/// ::= .thumb_set name, value +bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) { + StringRef Name; + if (Parser.parseIdentifier(Name)) { + TokError("expected identifier after '.thumb_set'"); + Parser.eatToEndOfStatement(); + return false; + } + + if (getLexer().isNot(AsmToken::Comma)) { + TokError("expected comma after name '" + Name + "'"); + Parser.eatToEndOfStatement(); + return false; + } + Lex(); + + const MCExpr *Value; + if (Parser.parseExpression(Value)) { + TokError("missing expression"); + Parser.eatToEndOfStatement(); + return false; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + TokError("unexpected token"); + Parser.eatToEndOfStatement(); + return false; + } + Lex(); + + MCSymbol *Alias = getContext().GetOrCreateSymbol(Name); + getTargetStreamer().emitThumbSet(Alias, Value); return false; } /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { - RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget); - RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget); + RegisterMCAsmParser<ARMAsmParser> X(TheARMLETarget); + RegisterMCAsmParser<ARMAsmParser> Y(TheARMBETarget); + RegisterMCAsmParser<ARMAsmParser> A(TheThumbLETarget); + RegisterMCAsmParser<ARMAsmParser> B(TheThumbBETarget); } #define GET_REGISTER_MATCHER @@ -8286,20 +9349,113 @@ extern "C" void LLVMInitializeARMAsmParser() { #define GET_MATCHER_IMPLEMENTATION #include "ARMGenAsmMatcher.inc" +static const struct ExtMapEntry { + const char *Extension; + const unsigned ArchCheck; + const uint64_t Features; +} Extensions[] = { + { "crc", Feature_HasV8, ARM::FeatureCRC }, + { "crypto", Feature_HasV8, + ARM::FeatureCrypto | ARM::FeatureNEON | ARM::FeatureFPARMv8 }, + { "fp", Feature_HasV8, ARM::FeatureFPARMv8 }, + { "idiv", Feature_HasV7 | Feature_IsNotMClass, + ARM::FeatureHWDiv | ARM::FeatureHWDivARM }, + // FIXME: iWMMXT not supported + { "iwmmxt", Feature_None, 0 }, + // FIXME: iWMMXT2 not supported + { "iwmmxt2", Feature_None, 0 }, + // FIXME: Maverick not supported + { "maverick", Feature_None, 0 }, + { "mp", Feature_HasV7 | Feature_IsNotMClass, ARM::FeatureMP }, + // FIXME: ARMv6-m OS Extensions feature not checked + { "os", Feature_None, 0 }, + // FIXME: Also available in ARMv6-K + { "sec", Feature_HasV7, ARM::FeatureTrustZone }, + { "simd", Feature_HasV8, ARM::FeatureNEON | ARM::FeatureFPARMv8 }, + // FIXME: Only available in A-class, isel not predicated + { "virt", Feature_HasV7, ARM::FeatureVirtualization }, + // FIXME: xscale not supported + { "xscale", Feature_None, 0 }, +}; + +/// parseDirectiveArchExtension +/// ::= .arch_extension [no]feature +bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { + if (getLexer().isNot(AsmToken::Identifier)) { + Error(getLexer().getLoc(), "unexpected token"); + Parser.eatToEndOfStatement(); + return false; + } + + StringRef Extension = Parser.getTok().getString(); + SMLoc ExtLoc = Parser.getTok().getLoc(); + getLexer().Lex(); + + bool EnableFeature = true; + if (Extension.startswith_lower("no")) { + EnableFeature = false; + Extension = Extension.substr(2); + } + + for (unsigned EI = 0, EE = array_lengthof(Extensions); EI != EE; ++EI) { + if (Extensions[EI].Extension != Extension) + continue; + + unsigned FB = getAvailableFeatures(); + if ((FB & Extensions[EI].ArchCheck) != Extensions[EI].ArchCheck) { + Error(ExtLoc, "architectural extension '" + Extension + "' is not " + "allowed for the current base architecture"); + return false; + } + + if (!Extensions[EI].Features) + report_fatal_error("unsupported architectural extension: " + Extension); + + if (EnableFeature) + FB |= ComputeAvailableFeatures(Extensions[EI].Features); + else + FB &= ~ComputeAvailableFeatures(Extensions[EI].Features); + + setAvailableFeatures(FB); + return false; + } + + Error(ExtLoc, "unknown architectural extension: " + Extension); + Parser.eatToEndOfStatement(); + return false; +} + // Define this matcher function after the auto-generated include so we // have the match class enum definitions. -unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, +unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, unsigned Kind) { - ARMOperand *Op = static_cast<ARMOperand*>(AsmOp); + ARMOperand &Op = static_cast<ARMOperand &>(AsmOp); // If the kind is a token for a literal immediate, check if our asm // operand matches. This is for InstAliases which have a fixed-value // immediate in the syntax. - if (Kind == MCK__35_0 && Op->isImm()) { - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (!CE) - return Match_InvalidOperand; - if (CE->getValue() == 0) + switch (Kind) { + default: break; + case MCK__35_0: + if (Op.isImm()) + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm())) + if (CE->getValue() == 0) + return Match_Success; + break; + case MCK_ARMSOImm: + if (Op.isImm()) { + const MCExpr *SOExpr = Op.getImm(); + int64_t Value; + if (!SOExpr->EvaluateAsAbsolute(Value)) + return Match_Success; + assert((Value >= INT32_MIN && Value <= UINT32_MAX) && + "expression value must be representable in 32 bits"); + } + break; + case MCK_GPRPair: + if (Op.isReg() && + MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg())) return Match_Success; + break; } return Match_InvalidOperand; } diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 9c7988f8a857..4d4038deac7c 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-disassembler" - #include "llvm/MC/MCDisassembler.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -29,6 +27,8 @@ using namespace llvm; +#define DEBUG_TYPE "arm-disassembler" + typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { @@ -90,20 +90,18 @@ class ARMDisassembler : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - ARMDisassembler(const MCSubtargetInfo &STI) : - MCDisassembler(STI) { + ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : + MCDisassembler(STI, Ctx) { } ~ARMDisassembler() { } /// getInstruction - See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, + DecodeStatus getInstruction(MCInst &instr, uint64_t &size, + const MemoryObject ®ion, uint64_t address, raw_ostream &vStream, - raw_ostream &cStream) const; + raw_ostream &cStream) const override; }; /// ThumbDisassembler - Thumb disassembler for all Thumb platforms. @@ -111,20 +109,18 @@ class ThumbDisassembler : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - ThumbDisassembler(const MCSubtargetInfo &STI) : - MCDisassembler(STI) { + ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : + MCDisassembler(STI, Ctx) { } ~ThumbDisassembler() { } /// getInstruction - See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, + DecodeStatus getInstruction(MCInst &instr, uint64_t &size, + const MemoryObject ®ion, uint64_t address, raw_ostream &vStream, - raw_ostream &cStream) const; + raw_ostream &cStream) const override; private: mutable ITStatus ITBlock; @@ -404,12 +400,16 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" -static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) { - return new ARMDisassembler(STI); +static MCDisassembler *createARMDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new ARMDisassembler(STI, Ctx); } -static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtargetInfo &STI) { - return new ThumbDisassembler(STI); +static MCDisassembler *createThumbDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new ThumbDisassembler(STI, Ctx); } DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, @@ -860,9 +860,13 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, extern "C" void LLVMInitializeARMDisassembler() { - TargetRegistry::RegisterMCDisassembler(TheARMTarget, + TargetRegistry::RegisterMCDisassembler(TheARMLETarget, + createARMDisassembler); + TargetRegistry::RegisterMCDisassembler(TheARMBETarget, createARMDisassembler); - TargetRegistry::RegisterMCDisassembler(TheThumbTarget, + TargetRegistry::RegisterMCDisassembler(TheThumbLETarget, + createThumbDisassembler); + TargetRegistry::RegisterMCDisassembler(TheThumbBETarget, createThumbDisassembler); } diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index f89702853d41..228fb5756ca8 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "ARMInstPrinter.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -23,6 +22,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + #include "ARMGenAsmWriter.inc" /// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing. @@ -307,17 +308,30 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, << markup(">"); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); - // If a symbolic branch target was added as a constant expression then print - // that address in hex. And only print 32 unsigned bits for the address. - const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); - int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { - O << "0x"; - O.write_hex((uint32_t)Address); + const MCExpr *Expr = Op.getExpr(); + switch (Expr->getKind()) { + case MCExpr::Binary: + O << '#' << *Expr; + break; + case MCExpr::Constant: { + // If a symbolic branch target was added as a constant expression then + // print that address in hex. And only print 32 unsigned bits for the + // address. + const MCConstantExpr *Constant = cast<MCConstantExpr>(Expr); + int64_t TargetAddress; + if (!Constant->EvaluateAsAbsolute(TargetAddress)) { + O << '#' << *Expr; + } else { + O << "0x"; + O.write_hex(static_cast<uint32_t>(TargetAddress)); + } + break; } - else { - // Otherwise, just print the expression. - O << *Op.getExpr(); + default: + // FIXME: Should we always treat this as if it is a constant literal and + // prefix it with '#'? + O << *Expr; + break; } } } @@ -1078,13 +1092,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, if (isSub) { O << ", " << markup("<imm:") - << "#-" << -OffImm + << "#-" << formatImm(-OffImm) << markup(">"); } else if (AlwaysPrintImm0 || OffImm > 0) { O << ", " << markup("<imm:") - << "#" << OffImm + << "#" << formatImm(OffImm) << markup(">"); } O << "]" << markup(">"); diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 15ae8d1267f7..f671fe49bb66 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -26,8 +26,8 @@ public: ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printRegName(raw_ostream &OS, unsigned RegNo) const override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.def b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.def new file mode 100644 index 000000000000..9f007a035a88 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.def @@ -0,0 +1,50 @@ +//===-- ARMArchName.def - List of the ARM arch names ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the list of the supported ARM architecture names, +// i.e. the supported value for -march= option. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef ARM_ARCH_NAME +#error "You must define ARM_ARCH_NAME before including ARMArchName.def" +#endif + +// ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) +ARM_ARCH_NAME("armv2", ARMV2, "2", v4) +ARM_ARCH_NAME("armv2a", ARMV2A, "2A", v4) +ARM_ARCH_NAME("armv3", ARMV3, "3", v4) +ARM_ARCH_NAME("armv3m", ARMV3M, "3M", v4) +ARM_ARCH_NAME("armv4", ARMV4, "4", v4) +ARM_ARCH_NAME("armv4t", ARMV4T, "4T", v4T) +ARM_ARCH_NAME("armv5", ARMV5, "5", v5T) +ARM_ARCH_NAME("armv5t", ARMV5T, "5T", v5T) +ARM_ARCH_NAME("armv5te", ARMV5TE, "5TE", v5TE) +ARM_ARCH_NAME("armv6", ARMV6, "6", v6) +ARM_ARCH_NAME("armv6j", ARMV6J, "6J", v6) +ARM_ARCH_NAME("armv6t2", ARMV6T2, "6T2", v6T2) +ARM_ARCH_NAME("armv6z", ARMV6Z, "6Z", v6KZ) +ARM_ARCH_NAME("armv6zk", ARMV6ZK, "6ZK", v6KZ) +ARM_ARCH_NAME("armv6-m", ARMV6M, "6-M", v6_M) +ARM_ARCH_NAME("armv7", ARMV7, "7", v7) +ARM_ARCH_NAME("armv7-a", ARMV7A, "7-A", v7) +ARM_ARCH_ALIAS("armv7a", ARMV7A) +ARM_ARCH_NAME("armv7-r", ARMV7R, "7-R", v7) +ARM_ARCH_ALIAS("armv7r", ARMV7R) +ARM_ARCH_NAME("armv7-m", ARMV7M, "7-M", v7) +ARM_ARCH_ALIAS("armv7m", ARMV7M) +ARM_ARCH_NAME("armv8-a", ARMV8A, "8-A", v8) +ARM_ARCH_ALIAS("armv8a", ARMV8A) +ARM_ARCH_NAME("iwmmxt", IWMMXT, "iwmmxt", v5TE) +ARM_ARCH_NAME("iwmmxt2", IWMMXT2, "iwmmxt2", v5TE) + +#undef ARM_ARCH_NAME +#undef ARM_ARCH_ALIAS diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.h new file mode 100644 index 000000000000..34b9fc126ff1 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.h @@ -0,0 +1,27 @@ +//===-- ARMArchName.h - List of the ARM arch names --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMARCHNAME_H +#define ARMARCHNAME_H + +namespace llvm { +namespace ARM { + +enum ArchKind { + INVALID_ARCH = 0 + +#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) , ID +#define ARM_ARCH_ALIAS(NAME, ID) /* empty */ +#include "ARMArchName.def" +}; + +} // namespace ARM +} // namespace llvm + +#endif // ARMARCHNAME_H diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 5615b808fc11..7acd9cc4d302 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -41,24 +41,27 @@ public: class ARMAsmBackend : public MCAsmBackend { const MCSubtargetInfo* STI; - bool isThumbMode; // Currently emitting Thumb code. + bool isThumbMode; // Currently emitting Thumb code. + bool IsLittleEndian; // Big or little endian. public: - ARMAsmBackend(const Target &T, const StringRef TT) + ARMAsmBackend(const Target &T, const StringRef TT, bool IsLittle) : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")), - isThumbMode(TT.startswith("thumb")) {} + isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {} ~ARMAsmBackend() { delete STI; } - unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; } + unsigned getNumFixupKinds() const override { + return ARM::NumTargetFixupKinds; + } bool hasNOP() const { return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0; } - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = { + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { + const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = { // This table *must* be in the order that the fixup_* kinds are defined in // ARMFixupKinds.h. // @@ -94,10 +97,43 @@ public: { "fixup_arm_movw_lo16", 0, 20, 0 }, { "fixup_t2_movt_hi16", 0, 20, 0 }, { "fixup_t2_movw_lo16", 0, 20, 0 }, -{ "fixup_arm_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel }, + }; + const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = { +// This table *must* be in the order that the fixup_* kinds are defined in +// ARMFixupKinds.h. +// +// Name Offset (bits) Size (bits) Flags +{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, +{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, +{ "fixup_thumb_adr_pcrel_10",8, 8, MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, +{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, +{ "fixup_arm_condbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_uncondbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_uncondbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_cp", 8, 8, MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, +{ "fixup_arm_thumb_bcc", 8, 8, MCFixupKindInfo::FKF_IsPCRel }, +// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19. +{ "fixup_arm_movt_hi16", 12, 20, 0 }, +{ "fixup_arm_movw_lo16", 12, 20, 0 }, +{ "fixup_t2_movt_hi16", 12, 20, 0 }, +{ "fixup_t2_movw_lo16", 12, 20, 0 }, }; if (Kind < FirstTargetFixupKind) @@ -105,32 +141,31 @@ public: assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; + return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind]; } /// processFixupValue - Target hook to process the literal value of a fixup /// if necessary. void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup, const MCFragment *DF, - MCValue &Target, uint64_t &Value, - bool &IsResolved); + const MCValue &Target, uint64_t &Value, + bool &IsResolved) override; void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const; + uint64_t Value, bool IsPCRel) const override; - bool mayNeedRelaxation(const MCInst &Inst) const; + bool mayNeedRelaxation(const MCInst &Inst) const override; - bool fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value, + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const; + const MCAsmLayout &Layout) const override; - void relaxInstruction(const MCInst &Inst, MCInst &Res) const; + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override; - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - void handleAssemblerFlag(MCAssemblerFlag Flag) { + void handleAssemblerFlag(MCAssemblerFlag Flag) override { switch (Flag) { default: break; case MCAF_Code16: @@ -145,6 +180,7 @@ public: unsigned getPointerSize() const { return 4; } bool isThumb() const { return isThumbMode; } void setIsThumb(bool it) { isThumbMode = it; } + bool isLittle() const { return IsLittleEndian; } }; } // end anonymous namespace @@ -155,6 +191,8 @@ static unsigned getRelaxedOpcode(unsigned Op) { case ARM::tLDRpci: return ARM::t2LDRpci; case ARM::tADR: return ARM::t2ADR; case ARM::tB: return ARM::t2B; + case ARM::tCBZ: return ARM::tHINT; + case ARM::tCBNZ: return ARM::tHINT; } } @@ -196,6 +234,12 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, int64_t Offset = int64_t(Value) - 4; return Offset > 1020 || Offset < 0 || Offset & 3; } + case ARM::fixup_arm_thumb_cb: + // If we have a Thumb CBZ or CBNZ instruction and its target is the next + // instruction it is is actually out of range for the instruction. + // It will be changed to a NOP. + int64_t Offset = (Value & ~1); + return Offset == 2; } llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!"); } @@ -212,7 +256,18 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { report_fatal_error("unexpected instruction to relax: " + OS.str()); } - // The instructions we're relaxing have (so far) the same operands. + // If we are changing Thumb CBZ or CBNZ instruction to a NOP, aka tHINT, we + // have to change the operands too. + if ((Inst.getOpcode() == ARM::tCBZ || Inst.getOpcode() == ARM::tCBNZ) && + RelaxedOp == ARM::tHINT) { + Res.setOpcode(RelaxedOp); + Res.addOperand(MCOperand::CreateImm(0)); + Res.addOperand(MCOperand::CreateImm(14)); + Res.addOperand(MCOperand::CreateReg(0)); + return; + } + + // The rest of instructions we're relaxing have the same operands. // We just need to update to the proper opcode. Res = Inst; Res.setOpcode(RelaxedOp); @@ -251,8 +306,36 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } +static uint32_t swapHalfWords(uint32_t Value, bool IsLittleEndian) { + if (IsLittleEndian) { + // Note that the halfwords are stored high first and low second in thumb; + // so we need to swap the fixup value here to map properly. + uint32_t Swapped = (Value & 0xFFFF0000) >> 16; + Swapped |= (Value & 0x0000FFFF) << 16; + return Swapped; + } + else + return Value; +} + +static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, + bool IsLittleEndian) { + uint32_t Value; + + if (IsLittleEndian) { + Value = (SecondHalf & 0xFFFF) << 16; + Value |= (FirstHalf & 0xFFFF); + } else { + Value = (SecondHalf & 0xFFFF); + Value |= (FirstHalf & 0xFFFF) << 16; + } + + return Value; +} + static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, - MCContext *Ctx = NULL) { + bool IsPCRel, MCContext *Ctx, + bool IsLittleEndian) { unsigned Kind = Fixup.getKind(); switch (Kind) { default: @@ -261,12 +344,15 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case FK_Data_2: case FK_Data_4: return Value; + case FK_SecRel_2: + return Value; + case FK_SecRel_4: + return Value; case ARM::fixup_arm_movt_hi16: - Value >>= 16; + if (!IsPCRel) + Value >>= 16; // Fallthrough - case ARM::fixup_arm_movw_lo16: - case ARM::fixup_arm_movt_hi16_pcrel: - case ARM::fixup_arm_movw_lo16_pcrel: { + case ARM::fixup_arm_movw_lo16: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned Lo12 = Value & 0x0FFF; // inst{19-16} = Hi4; @@ -275,12 +361,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return Value; } case ARM::fixup_t2_movt_hi16: - Value >>= 16; + if (!IsPCRel) + Value >>= 16; // Fallthrough - case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movt_hi16_pcrel: //FIXME: Shouldn't this be shifted like - // the other hi16 fixup? - case ARM::fixup_t2_movw_lo16_pcrel: { + case ARM::fixup_t2_movw_lo16: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned i = (Value & 0x800) >> 11; unsigned Mid3 = (Value & 0x700) >> 8; @@ -290,9 +374,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // inst{14-12} = Mid3; // inst{7-0} = Lo8; Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8); - uint64_t swapped = (Value & 0xFFFF0000) >> 16; - swapped |= (Value & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(Value, IsLittleEndian); } case ARM::fixup_arm_ldst_pcrel_12: // ARM PC-relative values are offset by 8. @@ -312,11 +394,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // Same addressing mode as fixup_arm_pcrel_10, // but with 16-bit halfwords swapped. - if (Kind == ARM::fixup_t2_ldst_pcrel_12) { - uint64_t swapped = (Value & 0xFFFF0000) >> 16; - swapped |= (Value & 0x0000FFFF) << 16; - return swapped; - } + if (Kind == ARM::fixup_t2_ldst_pcrel_12) + return swapHalfWords(Value, IsLittleEndian); return Value; } @@ -349,9 +428,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, out |= (Value & 0x700) << 4; out |= (Value & 0x0FF); - uint64_t swapped = (out & 0xFFFF0000) >> 16; - swapped |= (out & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_arm_condbranch: @@ -361,6 +438,9 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_blx: // These values don't encode the low two bits since they're always zero. // Offset by 8 just as above. + if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) + if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL) + return 0; return 0xffffff & ((Value - 8) >> 2); case ARM::fixup_t2_uncondbranch: { Value = Value - 4; @@ -379,9 +459,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, out |= (Value & 0x1FF800) << 5; // imm6 field out |= (Value & 0x0007FF); // imm11 field - uint64_t swapped = (out & 0xFFFF0000) >> 16; - swapped |= (out & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_t2_condbranch: { Value = Value - 4; @@ -394,70 +472,64 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, out |= (Value & 0x1F800) << 5; // imm6 field out |= (Value & 0x007FF); // imm11 field - uint32_t swapped = (out & 0xFFFF0000) >> 16; - swapped |= (out & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_arm_thumb_bl: { - // The value doesn't encode the low bit (always zero) and is offset by - // four. The 32-bit immediate value is encoded as - // imm32 = SignExtend(S:I1:I2:imm10:imm11:0) - // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S). - // The value is encoded into disjoint bit positions in the destination - // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, - // J = either J1 or J2 bit - // - // BL: xxxxxSIIIIIIIIII xxJxJIIIIIIIIIII - // - // Note that the halfwords are stored high first, low second; so we need - // to transpose the fixup value here to map properly. - uint32_t offset = (Value - 4) >> 1; - uint32_t signBit = (offset & 0x800000) >> 23; - uint32_t I1Bit = (offset & 0x400000) >> 22; - uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit; - uint32_t I2Bit = (offset & 0x200000) >> 21; - uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; - uint32_t imm10Bits = (offset & 0x1FF800) >> 11; - uint32_t imm11Bits = (offset & 0x000007FF); - - uint32_t Binary = 0; - uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); - uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | - (uint16_t)imm11Bits); - Binary |= secondHalf << 16; - Binary |= firstHalf; - return Binary; - + // The value doesn't encode the low bit (always zero) and is offset by + // four. The 32-bit immediate value is encoded as + // imm32 = SignExtend(S:I1:I2:imm10:imm11:0) + // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S). + // The value is encoded into disjoint bit positions in the destination + // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, + // J = either J1 or J2 bit + // + // BL: xxxxxSIIIIIIIIII xxJxJIIIIIIIIIII + // + // Note that the halfwords are stored high first, low second; so we need + // to transpose the fixup value here to map properly. + uint32_t offset = (Value - 4) >> 1; + uint32_t signBit = (offset & 0x800000) >> 23; + uint32_t I1Bit = (offset & 0x400000) >> 22; + uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit; + uint32_t I2Bit = (offset & 0x200000) >> 21; + uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; + uint32_t imm10Bits = (offset & 0x1FF800) >> 11; + uint32_t imm11Bits = (offset & 0x000007FF); + + uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); + uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + (uint16_t)imm11Bits); + return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian); } case ARM::fixup_arm_thumb_blx: { - // The value doesn't encode the low two bits (always zero) and is offset by - // four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as - // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00) - // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S). - // The value is encoded into disjoint bit positions in the destination - // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, - // J = either J1 or J2 bit, 0 = zero. - // - // BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0 - // - // Note that the halfwords are stored high first, low second; so we need - // to transpose the fixup value here to map properly. - uint32_t offset = (Value - 2) >> 2; - uint32_t signBit = (offset & 0x400000) >> 22; - uint32_t I1Bit = (offset & 0x200000) >> 21; - uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit; - uint32_t I2Bit = (offset & 0x100000) >> 20; - uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; - uint32_t imm10HBits = (offset & 0xFFC00) >> 10; - uint32_t imm10LBits = (offset & 0x3FF); - - uint32_t Binary = 0; - uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); - uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | - ((uint16_t)imm10LBits) << 1); - Binary |= secondHalf << 16; - Binary |= firstHalf; - return Binary; + // The value doesn't encode the low two bits (always zero) and is offset by + // four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as + // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00) + // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S). + // The value is encoded into disjoint bit positions in the destination + // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, + // J = either J1 or J2 bit, 0 = zero. + // + // BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0 + // + // Note that the halfwords are stored high first, low second; so we need + // to transpose the fixup value here to map properly. + uint32_t offset = (Value - 2) >> 2; + if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) + if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL) + offset = 0; + uint32_t signBit = (offset & 0x400000) >> 22; + uint32_t I1Bit = (offset & 0x200000) >> 21; + uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit; + uint32_t I2Bit = (offset & 0x100000) >> 20; + uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; + uint32_t imm10HBits = (offset & 0xFFC00) >> 10; + uint32_t imm10LBits = (offset & 0x3FF); + + uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); + uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + ((uint16_t)imm10LBits) << 1); + return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian); } case ARM::fixup_arm_thumb_cp: // Offset by 4, and don't encode the low two bits. Two bytes of that @@ -509,11 +581,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords // swapped. - if (Kind == ARM::fixup_t2_pcrel_10) { - uint32_t swapped = (Value & 0xFFFF0000) >> 16; - swapped |= (Value & 0x0000FFFF) << 16; - return swapped; - } + if (Kind == ARM::fixup_t2_pcrel_10) + return swapHalfWords(Value, IsLittleEndian); return Value; } @@ -524,7 +593,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup, const MCFragment *DF, - MCValue &Target, uint64_t &Value, + const MCValue &Target, uint64_t &Value, bool &IsResolved) { const MCSymbolRefExpr *A = Target.getSymA(); // Some fixups to thumb function symbols need the low bit (thumb bit) @@ -541,11 +610,18 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, Value |= 1; } } + // For Thumb1 BL instruction, it is possible to be a long jump between + // the basic blocks of the same function. Thus, we would like to resolve + // the offset when the destination has the same MCFragment. + if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { + const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); + const MCSymbolData &SymData = Asm.getSymbolData(Sym); + IsResolved = (SymData.getFragment() == DF); + } // We must always generate a relocation for BL/BLX instructions if we have // a symbol to reference, as the linker relies on knowing the destination // symbol's thumb-ness to get interworking right. if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl || (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) @@ -554,7 +630,8 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // Try to get the encoded value for the fixup as-if we're mapping it into // the instruction. This allows adjustFixupValue() to issue a diagnostic // if the value aren't invalid. - (void)adjustFixupValue(Fixup, Value, &Asm.getContext()); + (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(), + IsLittleEndian); } /// getFixupKindNumBytes - The number of bytes the fixup may change. @@ -595,33 +672,101 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_thumb_blx: case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movw_lo16: - case ARM::fixup_arm_movt_hi16_pcrel: - case ARM::fixup_arm_movw_lo16_pcrel: case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movt_hi16_pcrel: - case ARM::fixup_t2_movw_lo16_pcrel: + return 4; + + case FK_SecRel_2: + return 2; + case FK_SecRel_4: + return 4; + } +} + +/// getFixupKindContainerSizeBytes - The number of bytes of the +/// container involved in big endian. +static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + + case FK_Data_1: + return 1; + case FK_Data_2: + return 2; + case FK_Data_4: + return 4; + + case ARM::fixup_arm_thumb_bcc: + case ARM::fixup_arm_thumb_cp: + case ARM::fixup_thumb_adr_pcrel_10: + case ARM::fixup_arm_thumb_br: + case ARM::fixup_arm_thumb_cb: + // Instruction size is 2 bytes. + return 2; + + case ARM::fixup_arm_pcrel_10_unscaled: + case ARM::fixup_arm_ldst_pcrel_12: + case ARM::fixup_arm_pcrel_10: + case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: + case ARM::fixup_arm_blx: + case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: + case ARM::fixup_t2_ldst_pcrel_12: + case ARM::fixup_t2_condbranch: + case ARM::fixup_t2_uncondbranch: + case ARM::fixup_t2_pcrel_10: + case ARM::fixup_t2_adr_pcrel_12: + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_blx: + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movw_lo16: + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movw_lo16: + // Instruction size is 4 bytes. return 4; } } void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value) const { + unsigned DataSize, uint64_t Value, + bool IsPCRel) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - Value = adjustFixupValue(Fixup, Value); + Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian); if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + // Used to point to big endian bytes. + unsigned FullSizeBytes; + if (!IsLittleEndian) { + FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind()); + assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!"); + assert(NumBytes <= FullSizeBytes && "Invalid fixup size!"); + } + // For each byte of the fragment that the fixup touches, mask in the bits from // the fixup value. The Value has been "split up" into the appropriate // bitfields above. - for (unsigned i = 0; i != NumBytes; ++i) - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + for (unsigned i = 0; i != NumBytes; ++i) { + unsigned Idx = IsLittleEndian ? i : (FullSizeBytes - 1 - i); + Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff); + } } namespace { +// FIXME: This should be in a separate file. +class ARMWinCOFFAsmBackend : public ARMAsmBackend { +public: + ARMWinCOFFAsmBackend(const Target &T, const StringRef &Triple) + : ARMAsmBackend(T, Triple, true) { } + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); + } +}; // FIXME: This should be in a separate file. // ELF is an ELF of course... @@ -629,11 +774,11 @@ class ELFARMAsmBackend : public ARMAsmBackend { public: uint8_t OSABI; ELFARMAsmBackend(const Target &T, const StringRef TT, - uint8_t _OSABI) - : ARMAsmBackend(T, TT), OSABI(_OSABI) { } + uint8_t OSABI, bool IsLittle) + : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createARMELFObjectWriter(OS, OSABI); + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createARMELFObjectWriter(OS, OSABI, isLittle()); } }; @@ -643,29 +788,28 @@ public: const MachO::CPUSubTypeARM Subtype; DarwinARMAsmBackend(const Target &T, const StringRef TT, MachO::CPUSubTypeARM st) - : ARMAsmBackend(T, TT), Subtype(st) { + : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) { HasDataInCodeSupport = true; } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM, Subtype); } - - virtual bool doesSectionRequireSymbols(const MCSection &Section) const { - return false; - } }; } // end anonymous namespace MCAsmBackend *llvm::createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { + StringRef TT, StringRef CPU, + bool isLittle) { Triple TheTriple(TT); - if (TheTriple.isOSDarwin()) { + switch (TheTriple.getObjectFormat()) { + default: llvm_unreachable("unsupported object format"); + case Triple::MachO: { MachO::CPUSubTypeARM CS = StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName()) .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) @@ -673,7 +817,6 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6) .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M) .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM) - .Cases("armv7f", "thumbv7f", MachO::CPU_SUBTYPE_ARM_V7F) .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K) .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M) .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S) @@ -681,13 +824,37 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, return new DarwinARMAsmBackend(T, TT, CS); } + case Triple::COFF: + assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); + return new ARMWinCOFFAsmBackend(T, TT); + case Triple::ELF: + assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target"); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); + return new ELFARMAsmBackend(T, TT, OSABI, isLittle); + } +} -#if 0 - // FIXME: Introduce yet another checker but assert(0). - if (TheTriple.isOSBinFormatCOFF()) - assert(0 && "Windows not supported on ARM"); -#endif +MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { + return createARMAsmBackend(T, MRI, TT, CPU, true); +} + +MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { + return createARMAsmBackend(T, MRI, TT, CPU, false); +} - uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); - return new ELFARMAsmBackend(T, TT, OSABI); +MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { + return createARMAsmBackend(T, MRI, TT, CPU, true); } + +MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { + return createARMAsmBackend(T, MRI, TT, CPU, false); +} + diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index af939fc19129..1686d76b8e1b 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -183,7 +183,8 @@ namespace ARM_ISB { inline static const char *InstSyncBOptToString(unsigned val) { switch (val) { - default: llvm_unreachable("Unkown memory operation"); + default: + llvm_unreachable("Unknown memory operation"); case RESERVED_0: return "#0x0"; case RESERVED_1: return "#0x1"; case RESERVED_2: return "#0x2"; @@ -278,42 +279,41 @@ namespace ARMII { //===------------------------------------------------------------------===// // ARM Specific MachineOperand flags. - MO_NO_FLAG, + MO_NO_FLAG = 0, /// MO_LO16 - On a symbol operand, this represents a relocation containing /// lower 16 bit of the address. Used only via movw instruction. - MO_LO16, + MO_LO16 = 0x1, /// MO_HI16 - On a symbol operand, this represents a relocation containing /// higher 16 bit of the address. Used only via movt instruction. - MO_HI16, - - /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a - /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol, - /// i.e. "FOO$non_lazy_ptr". - /// Used only via movw instruction. - MO_LO16_NONLAZY, - - /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a - /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol, - /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction. - MO_HI16_NONLAZY, - - /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a - /// relocation containing lower 16 bit of the PC relative address of the - /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL". - /// Used only via movw instruction. - MO_LO16_NONLAZY_PIC, - - /// MO_HI16_NONLAZY_PIC - On a symbol operand "FOO", this represents a - /// relocation containing lower 16 bit of the PC relative address of the - /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL". - /// Used only via movt instruction. - MO_HI16_NONLAZY_PIC, + MO_HI16 = 0x2, /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a /// call operand. - MO_PLT + MO_PLT = 0x3, + + /// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects + /// just that part of the flag set. + MO_OPTION_MASK = 0x3f, + + /// MO_DLLIMPORT - On a symbol operand, this represents that the reference + /// to the symbol is for an import stub. This is used for DLL import + /// storage class indication on Windows. + MO_DLLIMPORT = 0x40, + + /// MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it + /// represents a symbol which, if indirect, will get special Darwin mangling + /// as a non-lazy-ptr indirect symbol (i.e. "L_FOO$non_lazy_ptr"). Can be + /// combined with MO_LO16, MO_HI16 or MO_NO_FLAG (in a constant-pool, for + /// example). + MO_NONLAZY = 0x80, + + // It's undefined behaviour if an enum overflows the range between its + // smallest and largest values, but since these are |ed together, it can + // happen. Put a sentinel in (values of this enum are stored as "unsigned + // char"). + MO_UNUSED_MAXIMUM = 0xff }; enum { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index f98bbd204c7a..a86601b6bb5b 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -34,14 +34,11 @@ namespace { virtual ~ARMELFObjectWriter(); - virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, bool IsRelocWithSymbol, - int64_t Addend) const; - virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F, - const MCFixup &Fixup, - bool IsPCRel) const; + unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel) const override; + + bool needsRelocateWithSymbol(const MCSymbolData &SD, + unsigned Type) const override; }; } @@ -52,91 +49,19 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) ARMELFObjectWriter::~ARMELFObjectWriter() {} -// In ARM, _MergedGlobals and other most symbols get emitted directly. -// I.e. not as an offset to a section symbol. -// This code is an approximation of what ARM/gcc does. - -STATISTIC(PCRelCount, "Total number of PIC Relocations"); -STATISTIC(NonPCRelCount, "Total number of non-PIC relocations"); - -const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F, - const MCFixup &Fixup, - bool IsPCRel) const { - const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol(); - bool EmitThisSym = false; - - const MCSectionELF &Section = - static_cast<const MCSectionELF&>(Symbol.getSection()); - bool InNormalSection = true; - unsigned RelocType = 0; - RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel); - - DEBUG( - const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind(); - MCSymbolRefExpr::VariantKind Kind2; - Kind2 = Target.getSymB() ? Target.getSymB()->getKind() : - MCSymbolRefExpr::VK_None; - dbgs() << "considering symbol " - << Section.getSectionName() << "/" - << Symbol.getName() << "/" - << " Rel:" << (unsigned)RelocType - << " Kind: " << (int)Kind << "/" << (int)Kind2 - << " Tmp:" - << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/" - << Symbol.isVariable() << "/" << Symbol.isTemporary() - << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n"); - - if (IsPCRel) { ++PCRelCount; - switch (RelocType) { - default: - // Most relocation types are emitted as explicit symbols - InNormalSection = - StringSwitch<bool>(Section.getSectionName()) - .Case(".data.rel.ro.local", false) - .Case(".data.rel", false) - .Case(".bss", false) - .Default(true); - EmitThisSym = true; - break; - case ELF::R_ARM_ABS32: - // But things get strange with R_ARM_ABS32 - // In this case, most things that go in .rodata show up - // as section relative relocations - InNormalSection = - StringSwitch<bool>(Section.getSectionName()) - .Case(".data.rel.ro.local", false) - .Case(".data.rel", false) - .Case(".rodata", false) - .Case(".bss", false) - .Default(true); - EmitThisSym = false; - break; - } - } else { - NonPCRelCount++; - InNormalSection = - StringSwitch<bool>(Section.getSectionName()) - .Case(".data.rel.ro.local", false) - .Case(".rodata", false) - .Case(".data.rel", false) - .Case(".bss", false) - .Default(true); - - switch (RelocType) { - default: EmitThisSym = true; break; - case ELF::R_ARM_ABS32: EmitThisSym = false; break; - case ELF::R_ARM_PREL31: EmitThisSym = false; break; - } - } - - if (EmitThisSym) - return &Symbol; - if (! Symbol.isTemporary() && InNormalSection) { - return &Symbol; +bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, + unsigned Type) const { + // FIXME: This is extremelly conservative. This really needs to use a + // whitelist with a clear explanation for why each realocation needs to + // point to the symbol, not to the section. + switch (Type) { + default: + return true; + + case ELF::R_ARM_PREL31: + case ELF::R_ARM_ABS32: + return false; } - return NULL; } // Need to examine the Fixup when determining whether to @@ -144,17 +69,14 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, // offset unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, - bool IsRelocWithSymbol, - int64_t Addend) const { + bool IsPCRel) const { return GetRelocTypeInner(Target, Fixup, IsPCRel); } unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? - MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); unsigned Type = 0; if (IsPCRel) { @@ -166,19 +88,25 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_None: Type = ELF::R_ARM_REL32; break; - case MCSymbolRefExpr::VK_ARM_TLSGD: + case MCSymbolRefExpr::VK_TLSGD: llvm_unreachable("unimplemented"); - case MCSymbolRefExpr::VK_ARM_GOTTPOFF: + case MCSymbolRefExpr::VK_GOTTPOFF: Type = ELF::R_ARM_TLS_IE32; break; + case MCSymbolRefExpr::VK_GOTPCREL: + Type = ELF::R_ARM_GOT_PREL; + break; } break; case ARM::fixup_arm_blx: case ARM::fixup_arm_uncondbl: switch (Modifier) { - case MCSymbolRefExpr::VK_ARM_PLT: + case MCSymbolRefExpr::VK_PLT: Type = ELF::R_ARM_PLT32; break; + case MCSymbolRefExpr::VK_ARM_TLSCALL: + Type = ELF::R_ARM_TLS_CALL; + break; default: Type = ELF::R_ARM_CALL; break; @@ -194,24 +122,27 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, Type = ELF::R_ARM_THM_JUMP24; break; case ARM::fixup_arm_movt_hi16: - case ARM::fixup_arm_movt_hi16_pcrel: Type = ELF::R_ARM_MOVT_PREL; break; case ARM::fixup_arm_movw_lo16: - case ARM::fixup_arm_movw_lo16_pcrel: Type = ELF::R_ARM_MOVW_PREL_NC; break; case ARM::fixup_t2_movt_hi16: - case ARM::fixup_t2_movt_hi16_pcrel: Type = ELF::R_ARM_THM_MOVT_PREL; break; case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movw_lo16_pcrel: Type = ELF::R_ARM_THM_MOVW_PREL_NC; break; case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: - Type = ELF::R_ARM_THM_CALL; + switch (Modifier) { + case MCSymbolRefExpr::VK_ARM_TLSCALL: + Type = ELF::R_ARM_THM_TLS_CALL; + break; + default: + Type = ELF::R_ARM_THM_CALL; + break; + } break; } } else { @@ -223,24 +154,27 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_NONE: Type = ELF::R_ARM_NONE; break; - case MCSymbolRefExpr::VK_ARM_GOT: + case MCSymbolRefExpr::VK_GOT: Type = ELF::R_ARM_GOT_BREL; break; - case MCSymbolRefExpr::VK_ARM_TLSGD: + case MCSymbolRefExpr::VK_TLSGD: Type = ELF::R_ARM_TLS_GD32; break; - case MCSymbolRefExpr::VK_ARM_TPOFF: + case MCSymbolRefExpr::VK_TPOFF: Type = ELF::R_ARM_TLS_LE32; break; - case MCSymbolRefExpr::VK_ARM_GOTTPOFF: + case MCSymbolRefExpr::VK_GOTTPOFF: Type = ELF::R_ARM_TLS_IE32; break; case MCSymbolRefExpr::VK_None: Type = ELF::R_ARM_ABS32; break; - case MCSymbolRefExpr::VK_ARM_GOTOFF: + case MCSymbolRefExpr::VK_GOTOFF: Type = ELF::R_ARM_GOTOFF32; break; + case MCSymbolRefExpr::VK_GOTPCREL: + Type = ELF::R_ARM_GOT_PREL; + break; case MCSymbolRefExpr::VK_ARM_TARGET1: Type = ELF::R_ARM_TARGET1; break; @@ -250,6 +184,18 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_PREL31: Type = ELF::R_ARM_PREL31; break; + case MCSymbolRefExpr::VK_ARM_TLSLDO: + Type = ELF::R_ARM_TLS_LDO32; + break; + case MCSymbolRefExpr::VK_ARM_TLSCALL: + Type = ELF::R_ARM_TLS_CALL; + break; + case MCSymbolRefExpr::VK_ARM_TLSDESC: + Type = ELF::R_ARM_TLS_GOTDESC; + break; + case MCSymbolRefExpr::VK_ARM_TLSDESCSEQ: + Type = ELF::R_ARM_TLS_DESCSEQ; + break; } break; case ARM::fixup_arm_ldst_pcrel_12: @@ -283,7 +229,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, } MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS, - uint8_t OSABI) { + uint8_t OSABI, + bool IsLittleEndian) { MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); + return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 471897de5c1c..7b5d8b01dfe6 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -13,14 +13,14 @@ // //===----------------------------------------------------------------------===// -#include "ARMBuildAttrs.h" +#include "ARMArchName.h" #include "ARMFPUName.h" #include "ARMRegisterInfo.h" -#include "ARMUnwindOp.h" #include "ARMUnwindOpAsm.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -30,6 +30,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" @@ -37,16 +38,20 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/ARMEHABI.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; static std::string GetAEABIUnwindPersonalityName(unsigned Index) { - assert(Index < NUM_PERSONALITY_INDEX && "Invalid personality index"); + assert(Index < ARM::EHABI::NUM_PERSONALITY_INDEX && + "Invalid personality index"); return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str(); } @@ -58,7 +63,46 @@ static const char *GetFPUName(unsigned ID) { #define ARM_FPU_NAME(NAME, ID) case ARM::ID: return NAME; #include "ARMFPUName.def" } - return NULL; + return nullptr; +} + +static const char *GetArchName(unsigned ID) { + switch (ID) { + default: + llvm_unreachable("Unknown ARCH kind"); + break; +#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \ + case ARM::ID: return NAME; +#define ARM_ARCH_ALIAS(NAME, ID) /* empty */ +#include "ARMArchName.def" + } + return nullptr; +} + +static const char *GetArchDefaultCPUName(unsigned ID) { + switch (ID) { + default: + llvm_unreachable("Unknown ARCH kind"); + break; +#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \ + case ARM::ID: return DEFAULT_CPU_NAME; +#define ARM_ARCH_ALIAS(NAME, ID) /* empty */ +#include "ARMArchName.def" + } + return nullptr; +} + +static unsigned GetArchDefaultCPUArch(unsigned ID) { + switch (ID) { + default: + llvm_unreachable("Unknown ARCH kind"); + break; +#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \ + case ARM::ID: return ARMBuildAttrs::DEFAULT_CPU_ARCH; +#define ARM_ARCH_ALIAS(NAME, ID) /* empty */ +#include "ARMArchName.def" + } + return 0; } namespace { @@ -68,36 +112,56 @@ class ARMELFStreamer; class ARMTargetAsmStreamer : public ARMTargetStreamer { formatted_raw_ostream &OS; MCInstPrinter &InstPrinter; - - virtual void emitFnStart(); - virtual void emitFnEnd(); - virtual void emitCantUnwind(); - virtual void emitPersonality(const MCSymbol *Personality); - virtual void emitHandlerData(); - virtual void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0); - virtual void emitPad(int64_t Offset); - virtual void emitRegSave(const SmallVectorImpl<unsigned> &RegList, - bool isVector); - - virtual void switchVendor(StringRef Vendor); - virtual void emitAttribute(unsigned Attribute, unsigned Value); - virtual void emitTextAttribute(unsigned Attribute, StringRef String); - virtual void emitFPU(unsigned FPU); - virtual void finishAttributeSection(); + bool IsVerboseAsm; + + void emitFnStart() override; + void emitFnEnd() override; + void emitCantUnwind() override; + void emitPersonality(const MCSymbol *Personality) override; + void emitPersonalityIndex(unsigned Index) override; + void emitHandlerData() override; + void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0) override; + void emitMovSP(unsigned Reg, int64_t Offset = 0) override; + void emitPad(int64_t Offset) override; + void emitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool isVector) override; + void emitUnwindRaw(int64_t Offset, + const SmallVectorImpl<uint8_t> &Opcodes) override; + + void switchVendor(StringRef Vendor) override; + void emitAttribute(unsigned Attribute, unsigned Value) override; + void emitTextAttribute(unsigned Attribute, StringRef String) override; + void emitIntTextAttribute(unsigned Attribute, unsigned IntValue, + StringRef StrinValue) override; + void emitArch(unsigned Arch) override; + void emitObjectArch(unsigned Arch) override; + void emitFPU(unsigned FPU) override; + void emitInst(uint32_t Inst, char Suffix = '\0') override; + void finishAttributeSection() override; + + void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; + void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override; public: - ARMTargetAsmStreamer(formatted_raw_ostream &OS, MCInstPrinter &InstPrinter); + ARMTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS, + MCInstPrinter &InstPrinter, bool VerboseAsm); }; -ARMTargetAsmStreamer::ARMTargetAsmStreamer(formatted_raw_ostream &OS, - MCInstPrinter &InstPrinter) - : OS(OS), InstPrinter(InstPrinter) {} +ARMTargetAsmStreamer::ARMTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter &InstPrinter, + bool VerboseAsm) + : ARMTargetStreamer(S), OS(OS), InstPrinter(InstPrinter), + IsVerboseAsm(VerboseAsm) {} void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; } void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; } void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; } void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) { OS << "\t.personality " << Personality->getName() << '\n'; } +void ARMTargetAsmStreamer::emitPersonalityIndex(unsigned Index) { + OS << "\t.personalityindex " << Index << '\n'; +} void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; } void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { @@ -109,6 +173,16 @@ void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, OS << ", #" << Offset; OS << '\n'; } +void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) { + assert((Reg != ARM::SP && Reg != ARM::PC) && + "the operand of .movsp cannot be either sp or pc"); + + OS << "\t.movsp\t"; + InstPrinter.printRegName(OS, Reg); + if (Offset) + OS << ", #" << Offset; + OS << '\n'; +} void ARMTargetAsmStreamer::emitPad(int64_t Offset) { OS << "\t.pad\t#" << Offset << '\n'; } @@ -132,22 +206,82 @@ void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) { } void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) { - OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value) << "\n"; + OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value); + if (IsVerboseAsm) { + StringRef Name = ARMBuildAttrs::AttrTypeAsString(Attribute); + if (!Name.empty()) + OS << "\t@ " << Name; + } + OS << "\n"; } void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute, StringRef String) { switch (Attribute) { - default: llvm_unreachable("Unsupported Text attribute in ASM Mode"); case ARMBuildAttrs::CPU_name: - OS << "\t.cpu\t" << String.lower() << "\n"; + OS << "\t.cpu\t" << String.lower(); + break; + default: + OS << "\t.eabi_attribute\t" << Attribute << ", \"" << String << "\""; + if (IsVerboseAsm) { + StringRef Name = ARMBuildAttrs::AttrTypeAsString(Attribute); + if (!Name.empty()) + OS << "\t@ " << Name; + } break; } + OS << "\n"; +} +void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute, + unsigned IntValue, + StringRef StringValue) { + switch (Attribute) { + default: llvm_unreachable("unsupported multi-value attribute in asm mode"); + case ARMBuildAttrs::compatibility: + OS << "\t.eabi_attribute\t" << Attribute << ", " << IntValue; + if (!StringValue.empty()) + OS << ", \"" << StringValue << "\""; + if (IsVerboseAsm) + OS << "\t@ " << ARMBuildAttrs::AttrTypeAsString(Attribute); + break; + } + OS << "\n"; +} +void ARMTargetAsmStreamer::emitArch(unsigned Arch) { + OS << "\t.arch\t" << GetArchName(Arch) << "\n"; +} +void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) { + OS << "\t.object_arch\t" << GetArchName(Arch) << '\n'; } void ARMTargetAsmStreamer::emitFPU(unsigned FPU) { OS << "\t.fpu\t" << GetFPUName(FPU) << "\n"; } void ARMTargetAsmStreamer::finishAttributeSection() { } +void +ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { + OS << "\t.tlsdescseq\t" << S->getSymbol().getName(); +} + +void ARMTargetAsmStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { + OS << "\t.thumb_set\t" << *Symbol << ", " << *Value << '\n'; +} + +void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) { + OS << "\t.inst"; + if (Suffix) + OS << "." << Suffix; + OS << "\t0x" << utohexstr(Inst) << "\n"; +} + +void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset, + const SmallVectorImpl<uint8_t> &Opcodes) { + OS << "\t.unwind_raw " << Offset; + for (SmallVectorImpl<uint8_t>::const_iterator OCI = Opcodes.begin(), + OCE = Opcodes.end(); + OCI != OCE; ++OCI) + OS << ", 0x" << utohexstr(*OCI); + OS << '\n'; +} class ARMTargetELFStreamer : public ARMTargetStreamer { private: @@ -158,7 +292,8 @@ private: enum { HiddenAttribute = 0, NumericAttribute, - TextAttribute + TextAttribute, + NumericAndTextAttributes } Type; unsigned Tag; unsigned IntValue; @@ -171,26 +306,17 @@ private: StringRef CurrentVendor; unsigned FPU; + unsigned Arch; + unsigned EmittedArch; SmallVector<AttributeItem, 64> Contents; const MCSection *AttributeSection; - // FIXME: this should be in a more generic place, but - // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf - static size_t getULEBSize(int Value) { - size_t Size = 0; - do { - Value >>= 7; - Size += sizeof(int8_t); // Is this really necessary? - } while (Value); - return Size; - } - AttributeItem *getAttributeItem(unsigned Attribute) { for (size_t i = 0; i < Contents.size(); ++i) if (Contents[i].Tag == Attribute) return &Contents[i]; - return 0; + return nullptr; } void setAttributeItem(unsigned Attribute, unsigned Value, @@ -199,6 +325,7 @@ private: if (AttributeItem *Item = getAttributeItem(Attribute)) { if (!OverwriteExisting) return; + Item->Type = AttributeItem::NumericAttribute; Item->IntValue = Value; return; } @@ -219,6 +346,7 @@ private: if (AttributeItem *Item = getAttributeItem(Attribute)) { if (!OverwriteExisting) return; + Item->Type = AttributeItem::TextAttribute; Item->StringValue = Value; return; } @@ -233,33 +361,69 @@ private: Contents.push_back(Item); } + void setAttributeItems(unsigned Attribute, unsigned IntValue, + StringRef StringValue, bool OverwriteExisting) { + // Look for existing attribute item + if (AttributeItem *Item = getAttributeItem(Attribute)) { + if (!OverwriteExisting) + return; + Item->Type = AttributeItem::NumericAndTextAttributes; + Item->IntValue = IntValue; + Item->StringValue = StringValue; + return; + } + + // Create new attribute item + AttributeItem Item = { + AttributeItem::NumericAndTextAttributes, + Attribute, + IntValue, + StringValue + }; + Contents.push_back(Item); + } + + void emitArchDefaultAttributes(); void emitFPUDefaultAttributes(); ARMELFStreamer &getStreamer(); - virtual void emitFnStart(); - virtual void emitFnEnd(); - virtual void emitCantUnwind(); - virtual void emitPersonality(const MCSymbol *Personality); - virtual void emitHandlerData(); - virtual void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0); - virtual void emitPad(int64_t Offset); - virtual void emitRegSave(const SmallVectorImpl<unsigned> &RegList, - bool isVector); - - virtual void switchVendor(StringRef Vendor); - virtual void emitAttribute(unsigned Attribute, unsigned Value); - virtual void emitTextAttribute(unsigned Attribute, StringRef String); - virtual void emitFPU(unsigned FPU); - virtual void finishAttributeSection(); + void emitFnStart() override; + void emitFnEnd() override; + void emitCantUnwind() override; + void emitPersonality(const MCSymbol *Personality) override; + void emitPersonalityIndex(unsigned Index) override; + void emitHandlerData() override; + void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0) override; + void emitMovSP(unsigned Reg, int64_t Offset = 0) override; + void emitPad(int64_t Offset) override; + void emitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool isVector) override; + void emitUnwindRaw(int64_t Offset, + const SmallVectorImpl<uint8_t> &Opcodes) override; + + void switchVendor(StringRef Vendor) override; + void emitAttribute(unsigned Attribute, unsigned Value) override; + void emitTextAttribute(unsigned Attribute, StringRef String) override; + void emitIntTextAttribute(unsigned Attribute, unsigned IntValue, + StringRef StringValue) override; + void emitArch(unsigned Arch) override; + void emitObjectArch(unsigned Arch) override; + void emitFPU(unsigned FPU) override; + void emitInst(uint32_t Inst, char Suffix = '\0') override; + void finishAttributeSection() override; + void emitLabel(MCSymbol *Symbol) override; + + void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; + void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override; size_t calculateContentSize() const; public: - ARMTargetELFStreamer() - : ARMTargetStreamer(), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU), - AttributeSection(0) { - } + ARMTargetELFStreamer(MCStreamer &S) + : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU), + Arch(ARM::INVALID_ARCH), EmittedArch(ARM::INVALID_ARCH), + AttributeSection(nullptr) {} }; /// Extend the generic ELFStreamer class so that it can emit mapping symbols at @@ -278,30 +442,32 @@ class ARMELFStreamer : public MCELFStreamer { public: friend class ARMTargetELFStreamer; - ARMELFStreamer(MCContext &Context, MCTargetStreamer *TargetStreamer, - MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, - bool IsThumb) - : MCELFStreamer(Context, TargetStreamer, TAB, OS, Emitter), - IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) { + ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter, bool IsThumb) + : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb), + MappingSymbolCounter(0), LastEMS(EMS_None) { Reset(); } ~ARMELFStreamer() {} - virtual void FinishImpl(); + void FinishImpl() override; // ARM exception handling directives void emitFnStart(); void emitFnEnd(); void emitCantUnwind(); void emitPersonality(const MCSymbol *Per); + void emitPersonalityIndex(unsigned index); void emitHandlerData(); void emitSetFP(unsigned NewFpReg, unsigned NewSpReg, int64_t Offset = 0); + void emitMovSP(unsigned Reg, int64_t Offset = 0); void emitPad(int64_t Offset); void emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector); + void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes); - virtual void ChangeSection(const MCSection *Section, - const MCExpr *Subsection) { + void ChangeSection(const MCSection *Section, + const MCExpr *Subsection) override { // We have to keep track of the mapping symbol state of any sections we // use. Each one should start off as EMS_None, which is provided as the // default constructor by DenseMap::lookup. @@ -314,19 +480,58 @@ public: /// This function is the one used to emit instruction data into the ELF /// streamer. We override it to add the appropriate mapping symbol if /// necessary. - virtual void EmitInstruction(const MCInst& Inst) { + void EmitInstruction(const MCInst& Inst, + const MCSubtargetInfo &STI) override { if (IsThumb) EmitThumbMappingSymbol(); else EmitARMMappingSymbol(); - MCELFStreamer::EmitInstruction(Inst); + MCELFStreamer::EmitInstruction(Inst, STI); + } + + void emitInst(uint32_t Inst, char Suffix) { + unsigned Size; + char Buffer[4]; + const bool LittleEndian = getContext().getAsmInfo()->isLittleEndian(); + + switch (Suffix) { + case '\0': + Size = 4; + + assert(!IsThumb); + EmitARMMappingSymbol(); + for (unsigned II = 0, IE = Size; II != IE; II++) { + const unsigned I = LittleEndian ? (Size - II - 1) : II; + Buffer[Size - II - 1] = uint8_t(Inst >> I * CHAR_BIT); + } + + break; + case 'n': + case 'w': + Size = (Suffix == 'n' ? 2 : 4); + + assert(IsThumb); + EmitThumbMappingSymbol(); + for (unsigned II = 0, IE = Size; II != IE; II = II + 2) { + const unsigned I0 = LittleEndian ? II + 0 : (Size - II - 1); + const unsigned I1 = LittleEndian ? II + 1 : (Size - II - 2); + Buffer[Size - II - 2] = uint8_t(Inst >> I0 * CHAR_BIT); + Buffer[Size - II - 1] = uint8_t(Inst >> I1 * CHAR_BIT); + } + + break; + default: + llvm_unreachable("Invalid Suffix"); + } + + MCELFStreamer::EmitBytes(StringRef(Buffer, Size)); } /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. - virtual void EmitBytes(StringRef Data) { + void EmitBytes(StringRef Data) override { EmitDataMappingSymbol(); MCELFStreamer::EmitBytes(Data); } @@ -334,12 +539,13 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) { + void EmitValueImpl(const MCExpr *Value, unsigned Size, + const SMLoc &Loc) override { EmitDataMappingSymbol(); MCELFStreamer::EmitValueImpl(Value, Size); } - virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { + void EmitAssemblerFlag(MCAssemblerFlag Flag) override { MCELFStreamer::EmitAssemblerFlag(Flag); switch (Flag) { @@ -402,13 +608,9 @@ private: Symbol->setVariableValue(Value); } - void EmitThumbFunc(MCSymbol *Func) { - // FIXME: Anything needed here to flag the function as thumb? - + void EmitThumbFunc(MCSymbol *Func) override { getAssembler().setIsThumbFunc(Func); - - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func); - SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc); + EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction); } // Helper functions for ARM exception handling directives @@ -423,6 +625,8 @@ private: void SwitchToExTabSection(const MCSymbol &FnStart); void SwitchToExIdxSection(const MCSymbol &FnStart); + void EmitFixup(const MCExpr *Expr, MCFixupKind Kind); + bool IsThumb; int64_t MappingSymbolCounter; @@ -446,8 +650,7 @@ private: } // end anonymous namespace ARMELFStreamer &ARMTargetELFStreamer::getStreamer() { - ARMELFStreamer *S = static_cast<ARMELFStreamer *>(Streamer); - return *S; + return static_cast<ARMELFStreamer &>(Streamer); } void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); } @@ -456,6 +659,9 @@ void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); } void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) { getStreamer().emitPersonality(Personality); } +void ARMTargetELFStreamer::emitPersonalityIndex(unsigned Index) { + getStreamer().emitPersonalityIndex(Index); +} void ARMTargetELFStreamer::emitHandlerData() { getStreamer().emitHandlerData(); } @@ -463,6 +669,9 @@ void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { getStreamer().emitSetFP(FpReg, SpReg, Offset); } +void ARMTargetELFStreamer::emitMovSP(unsigned Reg, int64_t Offset) { + getStreamer().emitMovSP(Reg, Offset); +} void ARMTargetELFStreamer::emitPad(int64_t Offset) { getStreamer().emitPad(Offset); } @@ -470,6 +679,10 @@ void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector) { getStreamer().emitRegSave(RegList, isVector); } +void ARMTargetELFStreamer::emitUnwindRaw(int64_t Offset, + const SmallVectorImpl<uint8_t> &Opcodes) { + getStreamer().emitUnwindRaw(Offset, Opcodes); +} void ARMTargetELFStreamer::switchVendor(StringRef Vendor) { assert(!Vendor.empty() && "Vendor cannot be empty."); @@ -491,6 +704,108 @@ void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute, StringRef Value) { setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true); } +void ARMTargetELFStreamer::emitIntTextAttribute(unsigned Attribute, + unsigned IntValue, + StringRef StringValue) { + setAttributeItems(Attribute, IntValue, StringValue, + /* OverwriteExisting= */ true); +} +void ARMTargetELFStreamer::emitArch(unsigned Value) { + Arch = Value; +} +void ARMTargetELFStreamer::emitObjectArch(unsigned Value) { + EmittedArch = Value; +} +void ARMTargetELFStreamer::emitArchDefaultAttributes() { + using namespace ARMBuildAttrs; + + setAttributeItem(CPU_name, GetArchDefaultCPUName(Arch), false); + if (EmittedArch == ARM::INVALID_ARCH) + setAttributeItem(CPU_arch, GetArchDefaultCPUArch(Arch), false); + else + setAttributeItem(CPU_arch, GetArchDefaultCPUArch(EmittedArch), false); + + switch (Arch) { + case ARM::ARMV2: + case ARM::ARMV2A: + case ARM::ARMV3: + case ARM::ARMV3M: + case ARM::ARMV4: + case ARM::ARMV5: + setAttributeItem(ARM_ISA_use, Allowed, false); + break; + + case ARM::ARMV4T: + case ARM::ARMV5T: + case ARM::ARMV5TE: + case ARM::ARMV6: + case ARM::ARMV6J: + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, Allowed, false); + break; + + case ARM::ARMV6T2: + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + break; + + case ARM::ARMV6Z: + case ARM::ARMV6ZK: + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, Allowed, false); + setAttributeItem(Virtualization_use, AllowTZ, false); + break; + + case ARM::ARMV6M: + setAttributeItem(THUMB_ISA_use, Allowed, false); + break; + + case ARM::ARMV7: + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + break; + + case ARM::ARMV7A: + setAttributeItem(CPU_arch_profile, ApplicationProfile, false); + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + break; + + case ARM::ARMV7R: + setAttributeItem(CPU_arch_profile, RealTimeProfile, false); + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + break; + + case ARM::ARMV7M: + setAttributeItem(CPU_arch_profile, MicroControllerProfile, false); + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + break; + + case ARM::ARMV8A: + setAttributeItem(CPU_arch_profile, ApplicationProfile, false); + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + setAttributeItem(MPextension_use, Allowed, false); + setAttributeItem(Virtualization_use, AllowTZVirtualization, false); + break; + + case ARM::IWMMXT: + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, Allowed, false); + setAttributeItem(WMMX_arch, AllowWMMXv1, false); + break; + + case ARM::IWMMXT2: + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, Allowed, false); + setAttributeItem(WMMX_arch, AllowWMMXv2, false); + break; + + default: + report_fatal_error("Unknown Arch: " + Twine(Arch)); + break; + } +} void ARMTargetELFStreamer::emitFPU(unsigned Value) { FPU = Value; } @@ -498,43 +813,43 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { switch (FPU) { case ARM::VFP: case ARM::VFPV2: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv2, /* OverwriteExisting= */ false); break; case ARM::VFPV3: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3A, /* OverwriteExisting= */ false); break; case ARM::VFPV3_D16: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3B, /* OverwriteExisting= */ false); break; case ARM::VFPV4: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4A, /* OverwriteExisting= */ false); break; case ARM::VFPV4_D16: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4B, /* OverwriteExisting= */ false); break; case ARM::FP_ARMV8: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPARMv8A, /* OverwriteExisting= */ false); break; case ARM::NEON: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3A, /* OverwriteExisting= */ false); setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, @@ -543,7 +858,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { break; case ARM::NEON_VFPV4: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4A, /* OverwriteExisting= */ false); setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, @@ -553,7 +868,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { case ARM::NEON_FP_ARMV8: case ARM::CRYPTO_NEON_FP_ARMV8: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPARMv8A, /* OverwriteExisting= */ false); setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, @@ -561,6 +876,9 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { /* OverwriteExisting= */ false); break; + case ARM::SOFTVFP: + break; + default: report_fatal_error("Unknown FPU: " + Twine(FPU)); break; @@ -574,13 +892,18 @@ size_t ARMTargetELFStreamer::calculateContentSize() const { case AttributeItem::HiddenAttribute: break; case AttributeItem::NumericAttribute: - Result += getULEBSize(item.Tag); - Result += getULEBSize(item.IntValue); + Result += getULEB128Size(item.Tag); + Result += getULEB128Size(item.IntValue); break; case AttributeItem::TextAttribute: - Result += getULEBSize(item.Tag); + Result += getULEB128Size(item.Tag); Result += item.StringValue.size() + 1; // string + '\0' break; + case AttributeItem::NumericAndTextAttributes: + Result += getULEB128Size(item.Tag); + Result += getULEB128Size(item.IntValue); + Result += item.StringValue.size() + 1; // string + '\0'; + break; } } return Result; @@ -597,6 +920,9 @@ void ARMTargetELFStreamer::finishAttributeSection() { if (FPU != ARM::INVALID_FPU) emitFPUDefaultAttributes(); + if (Arch != ARM::INVALID_ARCH) + emitArchDefaultAttributes(); + if (Contents.empty()) return; @@ -648,6 +974,11 @@ void ARMTargetELFStreamer::finishAttributeSection() { Streamer.EmitBytes(item.StringValue.upper()); Streamer.EmitIntValue(0, 1); // '\0' break; + case AttributeItem::NumericAndTextAttributes: + Streamer.EmitULEB128IntValue(item.IntValue); + Streamer.EmitBytes(item.StringValue.upper()); + Streamer.EmitIntValue(0, 1); // '\0' + break; } } @@ -655,8 +986,41 @@ void ARMTargetELFStreamer::finishAttributeSection() { FPU = ARM::INVALID_FPU; } +void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) { + ARMELFStreamer &Streamer = getStreamer(); + if (!Streamer.IsThumb) + return; + + const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol); + unsigned Type = MCELF::GetType(SD); + if (Type == ELF_STT_Func || Type == ELF_STT_GnuIFunc) + Streamer.EmitThumbFunc(Symbol); +} + +void +ARMTargetELFStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { + getStreamer().EmitFixup(S, FK_Data_4); +} + +void ARMTargetELFStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { + if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Value)) { + const MCSymbol &Sym = SRE->getSymbol(); + if (!Sym.isDefined()) { + getStreamer().EmitAssignment(Symbol, Value); + return; + } + } + + getStreamer().EmitThumbFunc(Symbol); + getStreamer().EmitAssignment(Symbol, Value); +} + +void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) { + getStreamer().emitInst(Inst, Suffix); +} + void ARMELFStreamer::FinishImpl() { - MCTargetStreamer &TS = getTargetStreamer(); + MCTargetStreamer &TS = *getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); ATS.finishAttributeSection(); @@ -679,7 +1043,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, } // Get .ARM.extab or .ARM.exidx section - const MCSectionELF *EHSection = NULL; + const MCSectionELF *EHSection = nullptr; if (const MCSymbol *Group = FnSection.getGroup()) { EHSection = getContext().getELFSection( EHSecName, Type, Flags | ELF::SHF_GROUP, Kind, @@ -691,7 +1055,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, // Switch to .ARM.extab or .ARM.exidx section SwitchSection(EHSection); - EmitCodeAlignment(4, 0); + EmitCodeAlignment(4); } inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) { @@ -709,12 +1073,17 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { SectionKind::getDataRel(), FnStart); } +void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) { + MCDataFragment *Frag = getOrCreateDataFragment(); + Frag->getFixups().push_back(MCFixup::Create(Frag->getContents().size(), Expr, + Kind)); +} void ARMELFStreamer::Reset() { - ExTab = NULL; - FnStart = NULL; - Personality = NULL; - PersonalityIndex = NUM_PERSONALITY_INDEX; + ExTab = nullptr; + FnStart = nullptr; + Personality = nullptr; + PersonalityIndex = ARM::EHABI::NUM_PERSONALITY_INDEX; FPReg = ARM::SP; FPOffset = 0; SPOffset = 0; @@ -727,13 +1096,13 @@ void ARMELFStreamer::Reset() { } void ARMELFStreamer::emitFnStart() { - assert(FnStart == 0); + assert(FnStart == nullptr); FnStart = getContext().CreateTempSymbol(); EmitLabel(FnStart); } void ARMELFStreamer::emitFnEnd() { - assert(FnStart && ".fnstart must preceeds .fnend"); + assert(FnStart && ".fnstart must precedes .fnend"); // Emit unwind opcodes if there is no .handlerdata directive if (!ExTab && !CantUnwind) @@ -742,7 +1111,7 @@ void ARMELFStreamer::emitFnEnd() { // Emit the exception index table entry SwitchToExIdxSection(*FnStart); - if (PersonalityIndex < NUM_PERSONALITY_INDEX) + if (PersonalityIndex < ARM::EHABI::NUM_PERSONALITY_INDEX) EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex)); const MCSymbolRefExpr *FnStartRef = @@ -753,7 +1122,7 @@ void ARMELFStreamer::emitFnEnd() { EmitValue(FnStartRef, 4); if (CantUnwind) { - EmitIntValue(EXIDX_CANTUNWIND, 4); + EmitIntValue(ARM::EHABI::EXIDX_CANTUNWIND, 4); } else if (ExTab) { // Emit a reference to the unwind opcodes in the ".ARM.extab" section. const MCSymbolRefExpr *ExTabEntryRef = @@ -765,12 +1134,15 @@ void ARMELFStreamer::emitFnEnd() { // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in // the second word of exception index table entry. The size of the unwind // opcodes should always be 4 bytes. - assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 && - "Compact model must use __aeabi_cpp_unwind_pr0 as personality"); + assert(PersonalityIndex == ARM::EHABI::AEABI_UNWIND_CPP_PR0 && + "Compact model must use __aeabi_unwind_cpp_pr0 as personality"); assert(Opcodes.size() == 4u && - "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4"); - EmitBytes(StringRef(reinterpret_cast<const char*>(Opcodes.data()), - Opcodes.size())); + "Unwind opcode size for __aeabi_unwind_cpp_pr0 must be equal to 4"); + uint64_t Intval = Opcodes[0] | + Opcodes[1] << 8 | + Opcodes[2] << 16 | + Opcodes[3] << 24; + EmitIntValue(Intval, Opcodes.size()); } // Switch to the section containing FnStart @@ -789,7 +1161,7 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create( PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext()); - AddValueSymbols(PersonalityRef); + visitUsedExpr(*PersonalityRef); MCDataFragment *DF = getOrCreateDataFragment(); DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(), PersonalityRef, @@ -820,7 +1192,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) { // For compact model 0, we have to emit the unwind opcodes in the .ARM.exidx // section. Thus, we don't have to create an entry in the .ARM.extab // section. - if (NoHandlerData && PersonalityIndex == AEABI_UNWIND_CPP_PR0) + if (NoHandlerData && PersonalityIndex == ARM::EHABI::AEABI_UNWIND_CPP_PR0) return; // Switch to .ARM.extab section. @@ -842,8 +1214,15 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) { } // Emit unwind opcodes - EmitBytes(StringRef(reinterpret_cast<const char *>(Opcodes.data()), - Opcodes.size())); + assert((Opcodes.size() % 4) == 0 && + "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be multiple of 4"); + for (unsigned I = 0; I != Opcodes.size(); I += 4) { + uint64_t Intval = Opcodes[I] | + Opcodes[I + 1] << 8 | + Opcodes[I + 2] << 16 | + Opcodes[I + 3] << 24; + EmitIntValue(Intval, 4); + } // According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or // __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted @@ -863,6 +1242,11 @@ void ARMELFStreamer::emitPersonality(const MCSymbol *Per) { UnwindOpAsm.setPersonality(Per); } +void ARMELFStreamer::emitPersonalityIndex(unsigned Index) { + assert(Index < ARM::EHABI::NUM_PERSONALITY_INDEX && "invalid index"); + PersonalityIndex = Index; +} + void ARMELFStreamer::emitSetFP(unsigned NewFPReg, unsigned NewSPReg, int64_t Offset) { assert((NewSPReg == ARM::SP || NewSPReg == FPReg) && @@ -877,6 +1261,20 @@ void ARMELFStreamer::emitSetFP(unsigned NewFPReg, unsigned NewSPReg, FPOffset += Offset; } +void ARMELFStreamer::emitMovSP(unsigned Reg, int64_t Offset) { + assert((Reg != ARM::SP && Reg != ARM::PC) && + "the operand of .movsp cannot be either sp or pc"); + assert(FPReg == ARM::SP && "current FP must be SP"); + + FlushPendingOffset(); + + FPReg = Reg; + FPOffset = SPOffset + Offset; + + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); + UnwindOpAsm.EmitSetSP(MRI->getEncodingValue(FPReg)); +} + void ARMELFStreamer::emitPad(int64_t Offset) { // Track the change of the $sp offset SPOffset -= Offset; @@ -916,27 +1314,37 @@ void ARMELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, UnwindOpAsm.EmitRegSave(Mask); } +void ARMELFStreamer::emitUnwindRaw(int64_t Offset, + const SmallVectorImpl<uint8_t> &Opcodes) { + FlushPendingOffset(); + SPOffset = SPOffset - Offset; + UnwindOpAsm.EmitRaw(Opcodes); +} + namespace llvm { MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useLoc, bool useCFI, - bool useDwarfDirectory, + bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) { - ARMTargetAsmStreamer *S = new ARMTargetAsmStreamer(OS, *InstPrint); + MCStreamer *S = llvm::createAsmStreamer( + Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); + new ARMTargetAsmStreamer(*S, OS, *InstPrint, isVerboseAsm); + return S; +} - return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI, - useDwarfDirectory, InstPrint, CE, TAB, - ShowInst); +MCStreamer *createARMNullStreamer(MCContext &Ctx) { + MCStreamer *S = llvm::createNullStreamer(Ctx); + new ARMTargetStreamer(*S); + return S; } MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack, bool IsThumb) { - ARMTargetELFStreamer *TS = new ARMTargetELFStreamer(); - ARMELFStreamer *S = - new ARMELFStreamer(Context, TS, TAB, OS, Emitter, IsThumb); + ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); + new ARMTargetELFStreamer(*S); // FIXME: This should eventually end up somewhere else where more // intelligent flag decisions can be made. For now we are just maintaining // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 0085feb82069..bfd9e3364825 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -100,15 +100,6 @@ enum Fixups { fixup_t2_movt_hi16, // :upper16: fixup_t2_movw_lo16, // :lower16: - // It is possible to create an "immediate" that happens to be pcrel. - // movw r0, :lower16:Foo-(Bar+8) and movt r0, :upper16:Foo-(Bar+8) - // result in different reloc tags than the above two. - // Needed to support ELF::R_ARM_MOVT_PREL and ELF::R_ARM_MOVW_PREL_NC - fixup_arm_movt_hi16_pcrel, // :upper16: - fixup_arm_movw_lo16_pcrel, // :lower16: - fixup_t2_movt_hi16_pcrel, // :upper16: - fixup_t2_movw_lo16_pcrel, // :lower16: - // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index ad796e660e96..7a19208cffc7 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -13,19 +13,19 @@ #include "ARMMCAsmInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/ADT/Triple.h" using namespace llvm; -cl::opt<bool> -EnableARMEHABI("arm-enable-ehabi", cl::Hidden, - cl::desc("Generate ARM EHABI tables"), - cl::init(false)); - - void ARMMCAsmInfoDarwin::anchor() { } -ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { - Data64bitsDirective = 0; +ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) { + Triple TheTriple(TT); + if ((TheTriple.getArch() == Triple::armeb) || + (TheTriple.getArch() == Triple::thumbeb)) + IsLittleEndian = false; + + Data64bitsDirective = nullptr; CommentString = "@"; Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; @@ -35,17 +35,23 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { // Exceptions handling ExceptionsType = ExceptionHandling::SjLj; + + UseIntegratedAssembler = true; } void ARMELFMCAsmInfo::anchor() { } -ARMELFMCAsmInfo::ARMELFMCAsmInfo() { +ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) { + Triple TheTriple(TT); + if ((TheTriple.getArch() == Triple::armeb) || + (TheTriple.getArch() == Triple::thumbeb)) + IsLittleEndian = false; + // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; - Data64bitsDirective = 0; + Data64bitsDirective = nullptr; CommentString = "@"; - PrivateGlobalPrefix = ".L"; Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; @@ -53,6 +59,56 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { SupportsDebugInformation = true; // Exceptions handling - if (EnableARMEHABI) + switch (TheTriple.getOS()) { + case Triple::NetBSD: + ExceptionsType = ExceptionHandling::DwarfCFI; + break; + default: ExceptionsType = ExceptionHandling::ARM; + break; + } + + // foo(plt) instead of foo@plt + UseParensForSymbolVariant = true; + + UseIntegratedAssembler = true; +} + +void ARMELFMCAsmInfo::setUseIntegratedAssembler(bool Value) { + UseIntegratedAssembler = Value; + if (!UseIntegratedAssembler) { + // gas doesn't handle VFP register names in cfi directives, + // so don't use register names with external assembler. + // See https://sourceware.org/bugzilla/show_bug.cgi?id=16694 + DwarfRegNumForCFI = true; + } +} + +void ARMCOFFMCAsmInfoMicrosoft::anchor() { } + +ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() { + AlignmentIsInBytes = false; + + PrivateGlobalPrefix = "$M"; } + +void ARMCOFFMCAsmInfoGNU::anchor() { } + +ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() { + AlignmentIsInBytes = false; + HasSingleParameterDotFile = true; + + CommentString = "@"; + Code16Directive = ".code\t16"; + Code32Directive = ".code\t32"; + PrivateGlobalPrefix = ".L"; + + HasLEB128 = true; + SupportsDebugInformation = true; + ExceptionsType = ExceptionHandling::None; + UseParensForSymbolVariant = true; + + UseIntegratedAssembler = false; + DwarfRegNumForCFI = true; +} + diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index e1f716d936ad..51cfa0adc1a9 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -14,21 +14,36 @@ #ifndef LLVM_ARMTARGETASMINFO_H #define LLVM_ARMTARGETASMINFO_H +#include "llvm/MC/MCAsmInfoCOFF.h" #include "llvm/MC/MCAsmInfoDarwin.h" #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { - virtual void anchor(); + void anchor() override; public: - explicit ARMMCAsmInfoDarwin(); + explicit ARMMCAsmInfoDarwin(StringRef TT); }; class ARMELFMCAsmInfo : public MCAsmInfoELF { - virtual void anchor(); + void anchor() override; public: - explicit ARMELFMCAsmInfo(); + explicit ARMELFMCAsmInfo(StringRef TT); + + void setUseIntegratedAssembler(bool Value) override; + }; + + class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft { + void anchor() override; + public: + explicit ARMCOFFMCAsmInfoMicrosoft(); + }; + + class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF { + void anchor() override; + public: + explicit ARMCOFFMCAsmInfoGNU(); }; } // namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 4382d0d97144..b8ee55574972 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mccodeemitter" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -26,10 +25,13 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "mccodeemitter" + STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created."); @@ -38,27 +40,25 @@ class ARMMCCodeEmitter : public MCCodeEmitter { ARMMCCodeEmitter(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION; void operator=(const ARMMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; - const MCSubtargetInfo &STI; const MCContext &CTX; + bool IsLittleEndian; public: - ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : MCII(mcii), STI(sti), CTX(ctx) { + ARMMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool IsLittle) + : MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) { } ~ARMMCCodeEmitter() {} - bool isThumb() const { - // FIXME: Can tablegen auto-generate this? + bool isThumb(const MCSubtargetInfo &STI) const { return (STI.getFeatureBits() & ARM::ModeThumb) != 0; } - bool isThumb2() const { - return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) != 0; + bool isThumb2(const MCSubtargetInfo &STI) const { + return isThumb(STI) && (STI.getFeatureBits() & ARM::FeatureThumb2) != 0; } - bool isTargetDarwin() const { + bool isTargetMachO(const MCSubtargetInfo &STI) const { Triple TT(STI.getTargetTriple()); - return TT.isOSDarwin(); + return TT.isOSBinFormatMachO(); } unsigned getMachineSoImmOpValue(unsigned SoImm) const; @@ -66,107 +66,131 @@ public: // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of /// the specified operand. This is used for operands with :lower16: and /// :upper16: prefixes. uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, unsigned &Imm, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getThumbBLTargetOpValue - Return encoding info for Thumb immediate /// BL branch target. uint32_t getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate /// BLX branch target. uint32_t getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target. uint32_t getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target. uint32_t getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target. uint32_t getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getBranchTargetOpValue - Return encoding info for 24-bit immediate /// branch target. uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit /// immediate Thumb2 direct branch target. uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate /// branch target. uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; uint32_t getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getAdrLabelOpValue - Return encoding info for 12-bit immediate /// ADR label target. uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; uint32_t getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; uint32_t getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' /// operand. uint32_t getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getThumbAddrModeRegRegOpValue - Return encoding for 'reg + reg' operand. uint32_t getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups)const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getT2AddrModeImm8s4OpValue - Return encoding info for 'reg +/- imm8<<2' /// operand. uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getT2AddrModeImm0_1020s4OpValue - Return encoding info for 'reg + imm8<<2' /// operand. uint32_t getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getT2Imm8s4OpValue - Return encoding info for '+/- imm8<<2' /// operand. uint32_t getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm' /// operand as needed by load/store instructions. uint32_t getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getLdStmModeOpValue - Return encoding for load/store multiple mode. uint32_t getLdStmModeOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm(); switch (Mode) { default: llvm_unreachable("Unknown addressing sub-mode!"); @@ -192,44 +216,54 @@ public: /// getAddrMode2OpValue - Return encoding for addrmode2 operands. uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands. uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getPostIdxRegOpValue - Return encoding for postidx_reg operands. uint32_t getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands. uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getAddrMode3OpValue - Return encoding for addrmode3 operands. uint32_t getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getAddrModeThumbSPOpValue - Return encoding info for 'reg +/- imm12' /// operand. uint32_t getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getAddrModeISOpValue - Encode the t_addrmode_is# operands. uint32_t getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands. uint32_t getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand. uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getCCOutOpValue - Return encoding of the 's' bit. unsigned getCCOutOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // The operand is either reg0 or CPSR. The 's' bit is encoded as '0' or // '1' respectively. return MI.getOperand(Op).getReg() == ARM::CPSR; @@ -237,8 +271,27 @@ public: /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value. unsigned getSOImmOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { - unsigned SoImm = MI.getOperand(Op).getImm(); + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + + const MCOperand &MO = MI.getOperand(Op); + + // We expect MO to be an immediate or an expression, + // if it is an immediate - that's fine, just encode the value. + // Otherwise - create a Fixup. + if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); + // In instruction code this value always encoded as lowest 12 bits, + // so we don't have to perform any specific adjustments. + // Due to requirements of relocatable records we have to use FK_Data_4. + // See ARMELFObjectWriter::ExplicitRelSym and + // ARMELFObjectWriter::GetRelocTypeInner for more details. + MCFixupKind Kind = MCFixupKind(FK_Data_4); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); + return 0; + } + + unsigned SoImm = MO.getImm(); int SoImmVal = ARM_AM::getSOImmVal(SoImm); assert(SoImmVal != -1 && "Not a valid so_imm value!"); @@ -253,7 +306,8 @@ public: /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value. unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { unsigned SoImm = MI.getOperand(Op).getImm(); unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm); assert(Encoded != ~0U && "Not a Thumb2 so_imm value?"); @@ -261,64 +315,88 @@ public: } unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; /// getSORegOpValue - Return an encoded so_reg shifted register value. unsigned getSORegRegOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getSORegImmOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { return 64 - MI.getOperand(Op).getImm(); } unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned getThumbSRImmOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; unsigned NEONThumb2DataIPostEncoder(const MCInst &MI, - unsigned EncodedValue) const; + unsigned EncodedValue, + const MCSubtargetInfo &STI) const; unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI, - unsigned EncodedValue) const; + unsigned EncodedValue, + const MCSubtargetInfo &STI) const; unsigned NEONThumb2DupPostEncoder(const MCInst &MI, - unsigned EncodedValue) const; + unsigned EncodedValue, + const MCSubtargetInfo &STI) const; unsigned NEONThumb2V8PostEncoder(const MCInst &MI, - unsigned EncodedValue) const; + unsigned EncodedValue, + const MCSubtargetInfo &STI) const; unsigned VFPThumb2PostEncoder(const MCInst &MI, - unsigned EncodedValue) const; + unsigned EncodedValue, + const MCSubtargetInfo &STI) const; void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; @@ -327,30 +405,39 @@ public: void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const { // Output the constant in little endian byte order. for (unsigned i = 0; i != Size; ++i) { - EmitByte(Val & 255, OS); - Val >>= 8; + unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8; + EmitByte((Val >> Shift) & 0xff, OS); } } void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override; }; } // end anonymous namespace -MCCodeEmitter *llvm::createARMMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new ARMMCCodeEmitter(MCII, STI, Ctx); +MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new ARMMCCodeEmitter(MCII, Ctx, true); +} + +MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new ARMMCCodeEmitter(MCII, Ctx, false); } /// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing /// instructions, and rewrite them to their Thumb2 form if we are currently in /// Thumb2 mode. unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI, - unsigned EncodedValue) const { - if (isThumb2()) { + unsigned EncodedValue, + const MCSubtargetInfo &STI) const { + if (isThumb2(STI)) { // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are // set to 1111. @@ -368,8 +455,9 @@ unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI, /// instructions, and rewrite them to their Thumb2 form if we are currently in /// Thumb2 mode. unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI, - unsigned EncodedValue) const { - if (isThumb2()) { + unsigned EncodedValue, + const MCSubtargetInfo &STI) const { + if (isThumb2(STI)) { EncodedValue &= 0xF0FFFFFF; EncodedValue |= 0x09000000; } @@ -381,8 +469,9 @@ unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI, /// instructions, and rewrite them to their Thumb2 form if we are currently in /// Thumb2 mode. unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI, - unsigned EncodedValue) const { - if (isThumb2()) { + unsigned EncodedValue, + const MCSubtargetInfo &STI) const { + if (isThumb2(STI)) { EncodedValue &= 0x00FFFFFF; EncodedValue |= 0xEE000000; } @@ -393,8 +482,9 @@ unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI, /// Post-process encoded NEON v8 instructions, and rewrite them to Thumb2 form /// if we are in Thumb2. unsigned ARMMCCodeEmitter::NEONThumb2V8PostEncoder(const MCInst &MI, - unsigned EncodedValue) const { - if (isThumb2()) { + unsigned EncodedValue, + const MCSubtargetInfo &STI) const { + if (isThumb2(STI)) { EncodedValue |= 0xC000000; // Set bits 27-26 } @@ -404,8 +494,9 @@ unsigned ARMMCCodeEmitter::NEONThumb2V8PostEncoder(const MCInst &MI, /// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite /// them to their Thumb2 form if we are currently in Thumb2 mode. unsigned ARMMCCodeEmitter:: -VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const { - if (isThumb2()) { +VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue, + const MCSubtargetInfo &STI) const { + if (isThumb2(STI)) { EncodedValue &= 0x0FFFFFFF; EncodedValue |= 0xE0000000; } @@ -416,7 +507,8 @@ VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const { /// operand requires relocation, record the relocation and return zero. unsigned ARMMCCodeEmitter:: getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { if (MO.isReg()) { unsigned Reg = MO.getReg(); unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg); @@ -444,7 +536,8 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, /// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand. bool ARMMCCodeEmitter:: EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, - unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups) const { + unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); @@ -473,7 +566,8 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, /// which is either an immediate or requires a fixup. static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, unsigned FixupKind, - SmallVectorImpl<MCFixup> &Fixups) { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) { const MCOperand &MO = MI.getOperand(OpIdx); // If the destination is an immediate, we have nothing to do. @@ -509,11 +603,12 @@ static int32_t encodeThumbBLOffset(int32_t offset) { /// getThumbBLTargetOpValue - Return encoding info for immediate branch target. uint32_t ARMMCCodeEmitter:: getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, - Fixups); + Fixups, STI); return encodeThumbBLOffset(MO.getImm()); } @@ -521,43 +616,47 @@ getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx, /// BLX branch target. uint32_t ARMMCCodeEmitter:: getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, - Fixups); + Fixups, STI); return encodeThumbBLOffset(MO.getImm()); } /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target. uint32_t ARMMCCodeEmitter:: getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, - Fixups); + Fixups, STI); return (MO.getImm() >> 1); } /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target. uint32_t ARMMCCodeEmitter:: getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, - Fixups); + Fixups, STI); return (MO.getImm() >> 1); } /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target. uint32_t ARMMCCodeEmitter:: getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups, STI); return (MO.getImm() >> 1); } @@ -582,27 +681,29 @@ static bool HasConditionalBranch(const MCInst &MI) { /// target. uint32_t ARMMCCodeEmitter:: getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // FIXME: This really, really shouldn't use TargetMachine. We don't want // coupling between MC and TM anywhere we can help it. - if (isThumb2()) + if (isThumb2(STI)) return - ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups); - return getARMBranchTargetOpValue(MI, OpIdx, Fixups); + ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups, STI); + return getARMBranchTargetOpValue(MI, OpIdx, Fixups, STI); } /// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch /// target. uint32_t ARMMCCodeEmitter:: getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) { if (HasConditionalBranch(MI)) return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_condbranch, Fixups); + ARM::fixup_arm_condbranch, Fixups, STI); return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_uncondbranch, Fixups); + ARM::fixup_arm_uncondbranch, Fixups, STI); } return MO.getImm() >> 2; @@ -610,13 +711,14 @@ getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) { if (HasConditionalBranch(MI)) return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_condbl, Fixups); - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups); + ARM::fixup_arm_condbl, Fixups, STI); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups, STI); } return MO.getImm() >> 2; @@ -624,10 +726,11 @@ getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_blx, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_blx, Fixups, STI); return MO.getImm() >> 1; } @@ -636,12 +739,13 @@ getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, /// immediate branch target. uint32_t ARMMCCodeEmitter:: getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { unsigned Val = 0; const MCOperand MO = MI.getOperand(OpIdx); if(MO.isExpr()) - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups, STI); else Val = MO.getImm() >> 1; @@ -665,11 +769,12 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, /// ADR label target. uint32_t ARMMCCodeEmitter:: getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12, - Fixups); + Fixups, STI); int64_t offset = MO.getImm(); uint32_t Val = 0x2000; @@ -705,11 +810,12 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, /// target. uint32_t ARMMCCodeEmitter:: getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12, - Fixups); + Fixups, STI); int32_t Val = MO.getImm(); if (Val == INT32_MIN) Val = 0x1000; @@ -724,11 +830,12 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, /// target. uint32_t ARMMCCodeEmitter:: getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10, - Fixups); + Fixups, STI); return MO.getImm(); } @@ -736,7 +843,8 @@ getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, /// operand. uint32_t ARMMCCodeEmitter:: getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &) const { + SmallVectorImpl<MCFixup> &, + const MCSubtargetInfo &STI) const { // [Rn, Rm] // {5-3} = Rm // {2-0} = Rn @@ -750,7 +858,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx, /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand. uint32_t ARMMCCodeEmitter:: getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // {17-13} = reg // {12} = (U)nsigned (add == '1', sub == '0') // {11-0} = imm12 @@ -767,7 +876,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, isAdd = false ; // 'U' bit is set as part of the fixup. MCFixupKind Kind; - if (isThumb2()) + if (isThumb2(STI)) Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12); else Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12); @@ -787,7 +896,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, Imm12 = Offset; } } else - isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups); + isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups, STI); uint32_t Binary = Imm12 & 0xfff; // Immediate is always encoded as positive. The 'U' bit controls add vs sub. @@ -801,7 +910,8 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, /// '+/- imm8<<2' operand. uint32_t ARMMCCodeEmitter:: getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // FIXME: The immediate operand should have already been encoded like this // before ever getting here. The encoder method should just need to combine // the MI operands for the register and the offset into a single @@ -832,7 +942,8 @@ getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx, /// 'reg +/- imm8<<2' operand. uint32_t ARMMCCodeEmitter:: getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // {12-9} = reg // {8} = (U)nsigned (add == '1', sub == '0') // {7-0} = imm8 @@ -852,7 +963,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, ++MCNumCPRelocations; } else - isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups); + isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups, STI); // FIXME: The immediate operand should have already been encoded like this // before ever getting here. The encoder method should just need to combine @@ -872,7 +983,8 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, /// 'reg + imm8<<2' operand. uint32_t ARMMCCodeEmitter:: getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // {11-8} = reg // {7-0} = imm8 const MCOperand &MO = MI.getOperand(OpIdx); @@ -882,22 +994,10 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, return (Reg << 8) | Imm8; } -// FIXME: This routine assumes that a binary -// expression will always result in a PCRel expression -// In reality, its only true if one or more subexpressions -// is itself a PCRel (i.e. "." in asm or some other pcrel construct) -// but this is good enough for now. -static bool EvaluateAsPCRel(const MCExpr *Expr) { - switch (Expr->getKind()) { - default: llvm_unreachable("Unexpected expression type"); - case MCExpr::SymbolRef: return false; - case MCExpr::Binary: return true; - } -} - uint32_t ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // {20-16} = imm{15-12} // {11-0} = imm{11-0} const MCOperand &MO = MI.getOperand(OpIdx); @@ -912,51 +1012,48 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E); E = ARM16Expr->getSubExpr(); + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(E)) { + const int64_t Value = MCE->getValue(); + if (Value > UINT32_MAX) + report_fatal_error("constant value truncated (limited to 32-bit)"); + + switch (ARM16Expr->getKind()) { + case ARMMCExpr::VK_ARM_HI16: + return (int32_t(Value) & 0xffff0000) >> 16; + case ARMMCExpr::VK_ARM_LO16: + return (int32_t(Value) & 0x0000ffff); + default: llvm_unreachable("Unsupported ARMFixup"); + } + } + switch (ARM16Expr->getKind()) { default: llvm_unreachable("Unsupported ARMFixup"); case ARMMCExpr::VK_ARM_HI16: - if (!isTargetDarwin() && EvaluateAsPCRel(E)) - Kind = MCFixupKind(isThumb2() - ? ARM::fixup_t2_movt_hi16_pcrel - : ARM::fixup_arm_movt_hi16_pcrel); - else - Kind = MCFixupKind(isThumb2() - ? ARM::fixup_t2_movt_hi16 - : ARM::fixup_arm_movt_hi16); + Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movt_hi16 + : ARM::fixup_arm_movt_hi16); break; case ARMMCExpr::VK_ARM_LO16: - if (!isTargetDarwin() && EvaluateAsPCRel(E)) - Kind = MCFixupKind(isThumb2() - ? ARM::fixup_t2_movw_lo16_pcrel - : ARM::fixup_arm_movw_lo16_pcrel); - else - Kind = MCFixupKind(isThumb2() - ? ARM::fixup_t2_movw_lo16 - : ARM::fixup_arm_movw_lo16); + Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movw_lo16 + : ARM::fixup_arm_movw_lo16); break; } + Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); return 0; } // If the expression doesn't have :upper16: or :lower16: on it, - // it's just a plain immediate expression, and those evaluate to + // it's just a plain immediate expression, previously those evaluated to // the lower 16 bits of the expression regardless of whether - // we have a movt or a movw. - if (!isTargetDarwin() && EvaluateAsPCRel(E)) - Kind = MCFixupKind(isThumb2() - ? ARM::fixup_t2_movw_lo16_pcrel - : ARM::fixup_arm_movw_lo16_pcrel); - else - Kind = MCFixupKind(isThumb2() - ? ARM::fixup_t2_movw_lo16 - : ARM::fixup_arm_movw_lo16); - Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); - return 0; + // we have a movt or a movw, but that led to misleadingly results. + // This is now disallowed in the the AsmParser in validateInstruction() + // so this should never happen. + llvm_unreachable("expression without :upper16: or :lower16:"); } uint32_t ARMMCCodeEmitter:: getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); const MCOperand &MO2 = MI.getOperand(OpIdx+2); @@ -989,21 +1086,23 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // {17-14} Rn // {13} 1 == imm12, 0 == Rm // {12} isAdd // {11-0} imm12/Rm const MCOperand &MO = MI.getOperand(OpIdx); unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); - uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups); + uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups, STI); Binary |= Rn << 14; return Binary; } uint32_t ARMMCCodeEmitter:: getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // {13} 1 == imm12, 0 == Rm // {12} isAdd // {11-0} imm12/Rm @@ -1025,7 +1124,8 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // {4} isAdd // {3-0} Rm const MCOperand &MO = MI.getOperand(OpIdx); @@ -1036,7 +1136,8 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // {9} 1 == imm8, 0 == Rm // {8} isAdd // {7-4} imm7_4/zero @@ -1055,7 +1156,8 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t ARMMCCodeEmitter:: getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // {13} 1 == imm8, 0 == Rm // {12-9} Rn // {8} isAdd @@ -1091,7 +1193,8 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, /// getAddrModeThumbSPOpValue - Encode the t_addrmode_sp operands. uint32_t ARMMCCodeEmitter:: getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // [SP, #imm] // {7-0} = imm8 const MCOperand &MO1 = MI.getOperand(OpIdx + 1); @@ -1106,7 +1209,8 @@ getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx, /// getAddrModeISOpValue - Encode the t_addrmode_is# operands. uint32_t ARMMCCodeEmitter:: getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // [Rn, #imm] // {7-3} = imm5 // {2-0} = Rn @@ -1120,17 +1224,19 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands. uint32_t ARMMCCodeEmitter:: getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups, STI); return (MO.getImm() >> 2); } /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand. uint32_t ARMMCCodeEmitter:: getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // {12-9} = reg // {8} = (U)nsigned (add == '1', sub == '0') // {7-0} = imm8 @@ -1146,7 +1252,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind; - if (isThumb2()) + if (isThumb2(STI)) Kind = MCFixupKind(ARM::fixup_t2_pcrel_10); else Kind = MCFixupKind(ARM::fixup_arm_pcrel_10); @@ -1154,7 +1260,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, ++MCNumCPRelocations; } else { - EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups); + EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups, STI); isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add; } @@ -1168,7 +1274,8 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, unsigned ARMMCCodeEmitter:: getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be // shifted. The second is Rs, the amount to shift by, and the third specifies // the type of the shift. @@ -1215,7 +1322,8 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, unsigned ARMMCCodeEmitter:: getSORegImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // Sub-operands are [reg, imm]. The first register is Rm, the reg to be // shifted. The second is the amount to shift by. // @@ -1261,7 +1369,8 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx, unsigned ARMMCCodeEmitter:: getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO1 = MI.getOperand(OpNum); const MCOperand &MO2 = MI.getOperand(OpNum+1); const MCOperand &MO3 = MI.getOperand(OpNum+2); @@ -1279,7 +1388,8 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, unsigned ARMMCCodeEmitter:: getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO1 = MI.getOperand(OpNum); const MCOperand &MO2 = MI.getOperand(OpNum+1); @@ -1300,7 +1410,8 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum, unsigned ARMMCCodeEmitter:: getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO1 = MI.getOperand(OpNum); // FIXME: Needs fixup support. @@ -1316,7 +1427,8 @@ getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum, unsigned ARMMCCodeEmitter:: getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO1 = MI.getOperand(OpNum); // FIXME: Needs fixup support. @@ -1332,7 +1444,8 @@ getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum, unsigned ARMMCCodeEmitter:: getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // Sub-operands are [reg, imm]. The first register is Rm, the reg to be // shifted. The second is the amount to shift by. // @@ -1374,7 +1487,8 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, unsigned ARMMCCodeEmitter:: getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // 10 bits. lower 5 bits are are the lsb of the mask, high five bits are the // msb of the mask. const MCOperand &MO = MI.getOperand(Op); @@ -1387,7 +1501,8 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, unsigned ARMMCCodeEmitter:: getRegisterListOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // VLDM/VSTM: // {12-8} = Vd // {7-0} = Number of registers @@ -1423,7 +1538,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, /// with the alignment operand. unsigned ARMMCCodeEmitter:: getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); @@ -1446,7 +1562,8 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, /// along with the alignment operand for use in VST1 and VLD1 with size 32. unsigned ARMMCCodeEmitter:: getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); @@ -1472,7 +1589,8 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, /// different for VLD4-dup. unsigned ARMMCCodeEmitter:: getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); @@ -1492,7 +1610,8 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, unsigned ARMMCCodeEmitter:: getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(Op); if (MO.getReg() == 0) return 0x0D; return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); @@ -1500,31 +1619,36 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, unsigned ARMMCCodeEmitter:: getShiftRight8Imm(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { return 8 - MI.getOperand(Op).getImm(); } unsigned ARMMCCodeEmitter:: getShiftRight16Imm(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { return 16 - MI.getOperand(Op).getImm(); } unsigned ARMMCCodeEmitter:: getShiftRight32Imm(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { return 32 - MI.getOperand(Op).getImm(); } unsigned ARMMCCodeEmitter:: getShiftRight64Imm(const MCInst &MI, unsigned Op, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { return 64 - MI.getOperand(Op).getImm(); } void ARMMCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { // Pseudo instructions don't get encoded. const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); uint64_t TSFlags = Desc.TSFlags; @@ -1537,10 +1661,10 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, else llvm_unreachable("Unexpected instruction size!"); - uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); + uint32_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); // Thumb 32-bit wide instructions need to emit the high order halfword // first. - if (isThumb() && Size == 4) { + if (isThumb(STI) && Size == 4) { EmitConstant(Binary >> 16, 2, OS); EmitConstant(Binary & 0xffff, 2, OS); } else diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index fc8505b052bd..e545e3c2f301 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "armmcexpr" #include "ARMMCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" using namespace llvm; +#define DEBUG_TYPE "armmcexpr" + const ARMMCExpr* ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx) { @@ -40,33 +41,6 @@ ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, return false; } -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbols_(BE->getLHS(), Asm); - AddValueSymbols_(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); - break; - } -} - -void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbols_(getSubExpr(), Asm); +void ARMMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index cd4067a52955..c5c0b10f8ad9 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -56,16 +56,16 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const; + void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const; - void AddValueSymbols(MCAssembler *) const; - const MCSection *FindAssociatedSection() const { + const MCAsmLayout *Layout) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); } // There are no TLS ARMMCExprs at the moment. - void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index a99de0e78230..6a3ec8fcc486 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -83,89 +84,87 @@ static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { Triple triple(TT); - // Set the boolean corresponding to the current target triple, or the default - // if one cannot be determined, to true. - unsigned Len = TT.size(); - unsigned Idx = 0; - - // FIXME: Enhance Triple helper class to extract ARM version. - bool isThumb = false; - if (Len >= 5 && TT.substr(0, 4) == "armv") - Idx = 4; - else if (Len >= 6 && TT.substr(0, 5) == "thumb") { - isThumb = true; - if (Len >= 7 && TT[5] == 'v') - Idx = 6; - } + bool isThumb = triple.getArch() == Triple::thumb || + triple.getArch() == Triple::thumbeb; bool NoCPU = CPU == "generic" || CPU.empty(); std::string ARMArchFeature; - if (Idx) { - unsigned SubVer = TT[Idx]; - if (SubVer == '8') { - if (NoCPU) - // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, FeatureMP, - // FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, FeatureT2XtPk, FeatureCrypto, FeatureCRC - ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,+trustzone,+t2xtpk,+crypto,+crc"; - else - // Use CPU to figure out the exact features - ARMArchFeature = "+v8"; - } else if (SubVer == '7') { - if (Len >= Idx+2 && TT[Idx+1] == 'm') { - isThumb = true; - if (NoCPU) - // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass - ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') { - if (NoCPU) - // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, - // FeatureT2XtPk, FeatureMClass - ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - } else if (Len >= Idx+2 && TT[Idx+1] == 's') { - if (NoCPU) - // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk - // Swift - ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+t2xtpk"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - } else { - // v7 CPUs have lots of different feature sets. If no CPU is specified, - // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return - // the "minimum" feature set and use CPU string to figure out the exact - // features. - if (NoCPU) - // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk - ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk"; - else - // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; - } - } else if (SubVer == '6') { - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') - ARMArchFeature = "+v6t2"; - else if (Len >= Idx+2 && TT[Idx+1] == 'm') { - isThumb = true; - if (NoCPU) - // v6m: FeatureNoARM, FeatureMClass - ARMArchFeature = "+v6m,+noarm,+mclass"; - else - ARMArchFeature = "+v6"; - } else - ARMArchFeature = "+v6"; - } else if (SubVer == '5') { - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') - ARMArchFeature = "+v5te"; - else - ARMArchFeature = "+v5t"; - } else if (SubVer == '4' && Len >= Idx+2 && TT[Idx+1] == 't') - ARMArchFeature = "+v4t"; + switch (triple.getSubArch()) { + case Triple::ARMSubArch_v8: + if (NoCPU) + // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, + // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, + // FeatureT2XtPk, FeatureCrypto, FeatureCRC + ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm," + "+trustzone,+t2xtpk,+crypto,+crc"; + else + // Use CPU to figure out the exact features + ARMArchFeature = "+v8"; + break; + case Triple::ARMSubArch_v7m: + isThumb = true; + if (NoCPU) + // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass + ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; + break; + case Triple::ARMSubArch_v7em: + if (NoCPU) + // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, + // FeatureT2XtPk, FeatureMClass + ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; + break; + case Triple::ARMSubArch_v7s: + if (NoCPU) + // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS + // Swift + ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; + break; + case Triple::ARMSubArch_v7: + // v7 CPUs have lots of different feature sets. If no CPU is specified, + // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return + // the "minimum" feature set and use CPU string to figure out the exact + // features. + if (NoCPU) + // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk + ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; + break; + case Triple::ARMSubArch_v6t2: + ARMArchFeature = "+v6t2"; + break; + case Triple::ARMSubArch_v6m: + isThumb = true; + if (NoCPU) + // v6m: FeatureNoARM, FeatureMClass + ARMArchFeature = "+v6m,+noarm,+mclass"; + else + ARMArchFeature = "+v6"; + break; + case Triple::ARMSubArch_v6: + ARMArchFeature = "+v6"; + break; + case Triple::ARMSubArch_v5te: + ARMArchFeature = "+v5te"; + break; + case Triple::ARMSubArch_v5: + ARMArchFeature = "+v5t"; + break; + case Triple::ARMSubArch_v4t: + ARMArchFeature = "+v4t"; + break; + case Triple::NoSubArch: + break; } if (isThumb) { @@ -215,10 +214,37 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); - if (TheTriple.isOSDarwin()) - return new ARMMCAsmInfoDarwin(); + MCAsmInfo *MAI; + switch (TheTriple.getOS()) { + case llvm::Triple::Darwin: + case llvm::Triple::IOS: + case llvm::Triple::MacOSX: + MAI = new ARMMCAsmInfoDarwin(TT); + break; + case llvm::Triple::Win32: + switch (TheTriple.getEnvironment()) { + case llvm::Triple::Itanium: + MAI = new ARMCOFFMCAsmInfoGNU(); + break; + case llvm::Triple::MSVC: + MAI = new ARMCOFFMCAsmInfoMicrosoft(); + break; + default: + llvm_unreachable("invalid environment"); + } + break; + default: + if (TheTriple.isOSBinFormatMachO()) + MAI = new ARMMCAsmInfoDarwin(TT); + else + MAI = new ARMELFMCAsmInfo(TT); + break; + } + + unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true); + MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0)); - return new ARMELFMCAsmInfo(); + return MAI; } static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, @@ -239,19 +265,25 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, MCContext &Ctx, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *Emitter, + const MCSubtargetInfo &STI, bool RelaxAll, bool NoExecStack) { Triple TheTriple(TT); - if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, MAB, OS, Emitter, false); - - if (TheTriple.isOSWindows()) { - llvm_unreachable("ARM does not support Windows COFF format"); + switch (TheTriple.getObjectFormat()) { + default: llvm_unreachable("unsupported object format"); + case Triple::MachO: { + MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, false); + new ARMTargetStreamer(*S); + return S; + } + case Triple::COFF: + assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); + return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS); + case Triple::ELF: + return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack, + TheTriple.getArch() == Triple::thumb); } - - return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack, - TheTriple.getArch() == Triple::thumb); } static MCInstPrinter *createARMMCInstPrinter(const Target &T, @@ -262,13 +294,13 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) return new ARMInstPrinter(MAI, MII, MRI, STI); - return 0; + return nullptr; } static MCRelocationInfo *createARMMCRelocationInfo(StringRef TT, MCContext &Ctx) { Triple TheTriple(TT); - if (TheTriple.isEnvironmentMachO()) + if (TheTriple.isOSBinFormatMachO()) return createARMMachORelocationInfo(Ctx); // Default to the stock relocation info. return llvm::createMCRelocationInfo(TT, Ctx); @@ -280,14 +312,14 @@ class ARMMCInstrAnalysis : public MCInstrAnalysis { public: ARMMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} - virtual bool isUnconditionalBranch(const MCInst &Inst) const { + bool isUnconditionalBranch(const MCInst &Inst) const override { // BCCs with the "always" predicate are unconditional branches. if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL) return true; return MCInstrAnalysis::isUnconditionalBranch(Inst); } - virtual bool isConditionalBranch(const MCInst &Inst) const { + bool isConditionalBranch(const MCInst &Inst) const override { // BCCs with the "always" predicate are unconditional branches. if (Inst.getOpcode() == ARM::Bcc && Inst.getOperand(1).getImm()==ARMCC::AL) return false; @@ -295,7 +327,7 @@ public: } bool evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size, uint64_t &Target) const { + uint64_t Size, uint64_t &Target) const override { // We only handle PCRel branches for now. if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL) return false; @@ -316,56 +348,100 @@ static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) { // Force static initialization. extern "C" void LLVMInitializeARMTargetMC() { // Register the MC asm info. - RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo); - RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo); + RegisterMCAsmInfoFn X(TheARMLETarget, createARMMCAsmInfo); + RegisterMCAsmInfoFn Y(TheARMBETarget, createARMMCAsmInfo); + RegisterMCAsmInfoFn A(TheThumbLETarget, createARMMCAsmInfo); + RegisterMCAsmInfoFn B(TheThumbBETarget, createARMMCAsmInfo); // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheARMTarget, createARMMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheThumbTarget, createARMMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget, createARMMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget, createARMMCCodeGenInfo); // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheARMBETarget, createARMMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheThumbLETarget, createARMMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheThumbBETarget, createARMMCInstrInfo); // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheARMLETarget, createARMMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheARMBETarget, createARMMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheThumbLETarget, createARMMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheThumbBETarget, createARMMCRegisterInfo); // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget, + TargetRegistry::RegisterMCSubtargetInfo(TheARMLETarget, + ARM_MC::createARMMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheARMBETarget, ARM_MC::createARMMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget, + TargetRegistry::RegisterMCSubtargetInfo(TheThumbLETarget, + ARM_MC::createARMMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheThumbBETarget, ARM_MC::createARMMCSubtargetInfo); // Register the MC instruction analyzer. - TargetRegistry::RegisterMCInstrAnalysis(TheARMTarget, + TargetRegistry::RegisterMCInstrAnalysis(TheARMLETarget, + createARMMCInstrAnalysis); + TargetRegistry::RegisterMCInstrAnalysis(TheARMBETarget, + createARMMCInstrAnalysis); + TargetRegistry::RegisterMCInstrAnalysis(TheThumbLETarget, createARMMCInstrAnalysis); - TargetRegistry::RegisterMCInstrAnalysis(TheThumbTarget, + TargetRegistry::RegisterMCInstrAnalysis(TheThumbBETarget, createARMMCInstrAnalysis); // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheARMTarget, createARMMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheThumbTarget, createARMMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheARMLETarget, + createARMLEMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheARMBETarget, + createARMBEMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheThumbLETarget, + createARMLEMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheThumbBETarget, + createARMBEMCCodeEmitter); // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheARMTarget, createARMAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheThumbTarget, createARMAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheARMLETarget, createARMLEAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheARMBETarget, createARMBEAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheThumbLETarget, + createThumbLEAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheThumbBETarget, + createThumbBEAsmBackend); // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheARMTarget, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheThumbTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheARMLETarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheARMBETarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheThumbLETarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheThumbBETarget, createMCStreamer); // Register the asm streamer. - TargetRegistry::RegisterAsmStreamer(TheARMTarget, createMCAsmStreamer); - TargetRegistry::RegisterAsmStreamer(TheThumbTarget, createMCAsmStreamer); + TargetRegistry::RegisterAsmStreamer(TheARMLETarget, createMCAsmStreamer); + TargetRegistry::RegisterAsmStreamer(TheARMBETarget, createMCAsmStreamer); + TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer); + TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer); + + // Register the null streamer. + TargetRegistry::RegisterNullStreamer(TheARMLETarget, createARMNullStreamer); + TargetRegistry::RegisterNullStreamer(TheARMBETarget, createARMNullStreamer); + TargetRegistry::RegisterNullStreamer(TheThumbLETarget, createARMNullStreamer); + TargetRegistry::RegisterNullStreamer(TheThumbBETarget, createARMNullStreamer); // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheThumbLETarget, + createARMMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheThumbBETarget, + createARMMCInstPrinter); // Register the MC relocation info. - TargetRegistry::RegisterMCRelocationInfo(TheARMTarget, + TargetRegistry::RegisterMCRelocationInfo(TheARMLETarget, + createARMMCRelocationInfo); + TargetRegistry::RegisterMCRelocationInfo(TheARMBETarget, + createARMMCRelocationInfo); + TargetRegistry::RegisterMCRelocationInfo(TheThumbLETarget, createARMMCRelocationInfo); - TargetRegistry::RegisterMCRelocationInfo(TheThumbTarget, + TargetRegistry::RegisterMCRelocationInfo(TheThumbBETarget, createARMMCRelocationInfo); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 959be8b55c3a..5326e564f363 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -33,7 +33,8 @@ class StringRef; class Target; class raw_ostream; -extern Target TheARMTarget, TheThumbTarget; +extern Target TheARMLETarget, TheThumbLETarget; +extern Target TheARMBETarget, TheThumbBETarget; namespace ARM_MC { std::string ParseARMTriple(StringRef TT, StringRef CPU); @@ -46,22 +47,47 @@ namespace ARM_MC { } MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useLoc, bool useCFI, - bool useDwarfDirectory, + bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst); -MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); +MCStreamer *createARMNullStreamer(MCContext &Ctx); + +MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx); MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU, + bool IsLittleEndian); + +MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); + +MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); +MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); + +MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); + +/// createARMWinCOFFStreamer - Construct a PE/COFF machine code streamer which +/// will generate a PE/COFF object file. +MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, + MCCodeEmitter &Emitter, raw_ostream &OS); + /// createARMELFObjectWriter - Construct an ELF Mach-O object writer. MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, - uint8_t OSABI); + uint8_t OSABI, + bool IsLittleEndian); /// createARMMachObjectWriter - Construct an ARM Mach-O object writer. MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, @@ -69,6 +95,8 @@ MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); +/// createARMWinCOFFObjectWriter - Construct an ARM PE/COFF object writer. +MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit); /// createARMMachORelocationInfo - Construct ARM Mach-O relocation info. MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index 807c9483bc38..d4b00e6e4fb5 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -9,10 +9,10 @@ #include "MCTargetDesc/ARMMCTargetDesc.h" #include "ARMMCExpr.h" +#include "llvm-c/Disassembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCRelocationInfo.h" -#include "llvm-c/Disassembler.h" using namespace llvm; using namespace object; @@ -23,7 +23,7 @@ public: ARMMachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {} const MCExpr *createExprForCAPIVariantKind(const MCExpr *SubExpr, - unsigned VariantKind) { + unsigned VariantKind) override { switch(VariantKind) { case LLVMDisassembler_VariantKind_ARM_HI16: return ARMMCExpr::CreateUpper16(SubExpr, Ctx); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 1f681bac2242..186776a1944f 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -32,6 +32,7 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter { const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, + unsigned Type, unsigned Log2Size, uint64_t &FixedValue); void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, @@ -56,7 +57,7 @@ public: void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue); + MCValue Target, uint64_t &FixedValue) override; }; } @@ -82,10 +83,14 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, Log2Size = llvm::Log2_32(8); return true; - // Handle 24-bit branch kinds. + // These fixups are expected to always be resolvable at assembly time and + // have no relocations supported. case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: case ARM::fixup_arm_adr_pcrel_12: + return false; + + // Handle 24-bit branch kinds. case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: case ARM::fixup_arm_uncondbl: @@ -119,23 +124,19 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, // 0 - arm instructions // 1 - thumb instructions case ARM::fixup_arm_movt_hi16: - case ARM::fixup_arm_movt_hi16_pcrel: RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 1; return true; case ARM::fixup_t2_movt_hi16: - case ARM::fixup_t2_movt_hi16_pcrel: RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 3; return true; case ARM::fixup_arm_movw_lo16: - case ARM::fixup_arm_movw_lo16_pcrel: RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 0; return true; case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movw_lo16_pcrel: RelocType = unsigned(MachO::ARM_RELOC_HALF); Log2Size = 2; return true; @@ -156,7 +157,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); + const MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -170,7 +171,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, FixedValue += SecAddr; if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -202,22 +203,19 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, switch ((unsigned)Fixup.getKind()) { default: break; case ARM::fixup_arm_movt_hi16: - case ARM::fixup_arm_movt_hi16_pcrel: MovtBit = 1; // The thumb bit shouldn't be set in the 'other-half' bit of the // relocation, but it will be set in FixedValue if the base symbol // is a thumb function. Clear it out here. - if (A_SD->getFlags() & SF_ThumbFunc) + if (Asm.isThumbFunc(A)) FixedValue &= 0xfffffffe; break; case ARM::fixup_t2_movt_hi16: - case ARM::fixup_t2_movt_hi16_pcrel: - if (A_SD->getFlags() & SF_ThumbFunc) + if (Asm.isThumbFunc(A)) FixedValue &= 0xfffffffe; MovtBit = 1; // Fallthrough case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movw_lo16_pcrel: ThumbBit = 1; break; } @@ -254,15 +252,15 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, + unsigned Type, unsigned Log2Size, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = MachO::ARM_RELOC_VANILLA; // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); + const MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -275,7 +273,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols"); + const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -377,11 +376,12 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + Target, RelocType, Log2Size, + FixedValue); } // Get the symbol data, if any. - MCSymbolData *SD = 0; + const MCSymbolData *SD = nullptr; if (Target.getSymA()) SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); @@ -395,7 +395,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, Offset += 1 << Log2Size; if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + Target, RelocType, Log2Size, + FixedValue); // See <reloc.h>. uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); @@ -461,15 +462,11 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, switch ((unsigned)Fixup.getKind()) { default: break; case ARM::fixup_arm_movw_lo16: - case ARM::fixup_arm_movw_lo16_pcrel: case ARM::fixup_t2_movw_lo16: - case ARM::fixup_t2_movw_lo16_pcrel: Value = (FixedValue >> 16) & 0xffff; break; case ARM::fixup_arm_movt_hi16: - case ARM::fixup_arm_movt_hi16_pcrel: case ARM::fixup_t2_movt_hi16: - case ARM::fixup_t2_movt_hi16_pcrel: Value = FixedValue & 0xffff; break; } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp new file mode 100644 index 000000000000..8acd7aff6bca --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -0,0 +1,73 @@ +//===- ARMTargetStreamer.cpp - ARMTargetStreamer class --*- C++ -*---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARMTargetStreamer class. +// +//===----------------------------------------------------------------------===// +#include "llvm/ADT/MapVector.h" +#include "llvm/MC/ConstantPools.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; +// +// ARMTargetStreamer Implemenation +// +ARMTargetStreamer::ARMTargetStreamer(MCStreamer &S) + : MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {} + +ARMTargetStreamer::~ARMTargetStreamer() {} + +// The constant pool handling is shared by all ARMTargetStreamer +// implementations. +const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr) { + return ConstantPools->addEntry(Streamer, Expr, 4); +} + +void ARMTargetStreamer::emitCurrentConstantPool() { + ConstantPools->emitForCurrentSection(Streamer); +} + +// finish() - write out any non-empty assembler constant pools. +void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); } + +// The remaining callbacks should be handled separately by each +// streamer. +void ARMTargetStreamer::emitFnStart() {} +void ARMTargetStreamer::emitFnEnd() {} +void ARMTargetStreamer::emitCantUnwind() {} +void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) {} +void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) {} +void ARMTargetStreamer::emitHandlerData() {} +void ARMTargetStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, + int64_t Offset) {} +void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {} +void ARMTargetStreamer::emitPad(int64_t Offset) {} +void ARMTargetStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool isVector) {} +void ARMTargetStreamer::emitUnwindRaw(int64_t StackOffset, + const SmallVectorImpl<uint8_t> &Opcodes) { +} +void ARMTargetStreamer::switchVendor(StringRef Vendor) {} +void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {} +void ARMTargetStreamer::emitTextAttribute(unsigned Attribute, + StringRef String) {} +void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute, + unsigned IntValue, + StringRef StringValue) {} +void ARMTargetStreamer::emitArch(unsigned Arch) {} +void ARMTargetStreamer::emitObjectArch(unsigned Arch) {} +void ARMTargetStreamer::emitFPU(unsigned FPU) {} +void ARMTargetStreamer::finishAttributeSection() {} +void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {} +void +ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {} + +void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {} diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h deleted file mode 100644 index fa4add65a8df..000000000000 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h +++ /dev/null @@ -1,125 +0,0 @@ -//===-- ARMUnwindOp.h - ARM Unwind Opcodes ----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the constants for the ARM unwind opcodes and exception -// handling table entry kinds. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM_UNWIND_OP_H -#define ARM_UNWIND_OP_H - -namespace llvm { - - /// ARM exception handling table entry kinds - enum ARMEHTEntryKind { - EHT_GENERIC = 0x00, - EHT_COMPACT = 0x80 - }; - - enum { - /// Special entry for the function never unwind - EXIDX_CANTUNWIND = 0x1 - }; - - /// ARM-defined frame unwinding opcodes - enum ARMUnwindOpcodes { - // Format: 00xxxxxx - // Purpose: vsp = vsp + ((x << 2) + 4) - UNWIND_OPCODE_INC_VSP = 0x00, - - // Format: 01xxxxxx - // Purpose: vsp = vsp - ((x << 2) + 4) - UNWIND_OPCODE_DEC_VSP = 0x40, - - // Format: 10000000 00000000 - // Purpose: refuse to unwind - UNWIND_OPCODE_REFUSE = 0x8000, - - // Format: 1000xxxx xxxxxxxx - // Purpose: pop r[15:12], r[11:4] - // Constraint: x != 0 - UNWIND_OPCODE_POP_REG_MASK_R4 = 0x8000, - - // Format: 1001xxxx - // Purpose: vsp = r[x] - // Constraint: x != 13 && x != 15 - UNWIND_OPCODE_SET_VSP = 0x90, - - // Format: 10100xxx - // Purpose: pop r[(4+x):4] - UNWIND_OPCODE_POP_REG_RANGE_R4 = 0xa0, - - // Format: 10101xxx - // Purpose: pop r14, r[(4+x):4] - UNWIND_OPCODE_POP_REG_RANGE_R4_R14 = 0xa8, - - // Format: 10110000 - // Purpose: finish - UNWIND_OPCODE_FINISH = 0xb0, - - // Format: 10110001 0000xxxx - // Purpose: pop r[3:0] - // Constraint: x != 0 - UNWIND_OPCODE_POP_REG_MASK = 0xb100, - - // Format: 10110010 x(uleb128) - // Purpose: vsp = vsp + ((x << 2) + 0x204) - UNWIND_OPCODE_INC_VSP_ULEB128 = 0xb2, - - // Format: 10110011 xxxxyyyy - // Purpose: pop d[(x+y):x] - UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX = 0xb300, - - // Format: 10111xxx - // Purpose: pop d[(8+x):8] - UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX_D8 = 0xb8, - - // Format: 11000xxx - // Purpose: pop wR[(10+x):10] - UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE_WR10 = 0xc0, - - // Format: 11000110 xxxxyyyy - // Purpose: pop wR[(x+y):x] - UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE = 0xc600, - - // Format: 11000111 0000xxxx - // Purpose: pop wCGR[3:0] - // Constraint: x != 0 - UNWIND_OPCODE_POP_WIRELESS_MMX_REG_MASK = 0xc700, - - // Format: 11001000 xxxxyyyy - // Purpose: pop d[(16+x+y):(16+x)] - UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 = 0xc800, - - // Format: 11001001 xxxxyyyy - // Purpose: pop d[(x+y):x] - UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD = 0xc900, - - // Format: 11010xxx - // Purpose: pop d[(8+x):8] - UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0 - }; - - /// ARM-defined Personality Routine Index - enum ARMPersonalityRoutineIndex { - // To make the exception handling table become more compact, ARM defined - // several personality routines in EHABI. There are 3 different - // personality routines in ARM EHABI currently. It is possible to have 16 - // pre-defined personality routines at most. - AEABI_UNWIND_CPP_PR0 = 0, - AEABI_UNWIND_CPP_PR1 = 1, - AEABI_UNWIND_CPP_PR2 = 2, - - NUM_PERSONALITY_INDEX - }; - -} - -#endif // ARM_UNWIND_OP_H diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp index c94337081884..593fe349b1d4 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -13,8 +13,7 @@ //===----------------------------------------------------------------------===// #include "ARMUnwindOpAsm.h" - -#include "ARMUnwindOp.h" +#include "llvm/Support/ARMEHABI.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" @@ -50,14 +49,15 @@ namespace { /// Emit the personality index prefix. inline void EmitPersonalityIndex(unsigned PI) { - assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix"); - EmitByte(EHT_COMPACT | PI); + assert(PI < ARM::EHABI::NUM_PERSONALITY_INDEX && + "Invalid personality prefix"); + EmitByte(ARM::EHABI::EHT_COMPACT | PI); } /// Fill the rest of bytes with FINISH opcode. inline void FillFinishOpcode() { while (Pos < Vec.size()) - EmitByte(UNWIND_OPCODE_FINISH); + EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH); } }; } @@ -85,22 +85,22 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask); if (UnmaskedReg == 0u) { // Pop r[4 : (4 + n)] - EmitInt8(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range); + EmitInt8(ARM::EHABI::UNWIND_OPCODE_POP_REG_RANGE_R4 | Range); RegSave &= 0x000fu; } else if (UnmaskedReg == (1u << 14)) { // Pop r[14] + r[4 : (4 + n)] - EmitInt8(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range); + EmitInt8(ARM::EHABI::UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range); RegSave &= 0x000fu; } } // Two bytes opcode to save register r15-r4 if ((RegSave & 0xfff0u) != 0) - EmitInt16(UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4)); + EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4)); // Opcode to save register r3-r0 if ((RegSave & 0x000fu) != 0) - EmitInt16(UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu)); + EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu)); } /// Emit unwind opcodes for .vsave directives @@ -125,7 +125,7 @@ void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) { Bit >>= 1; } - EmitInt16(UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | + EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range); } @@ -147,34 +147,36 @@ void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) { Bit >>= 1; } - EmitInt16(UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range); + EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | + Range); } } /// Emit unwind opcodes to copy address from source register to $sp. void UnwindOpcodeAssembler::EmitSetSP(uint16_t Reg) { - EmitInt8(UNWIND_OPCODE_SET_VSP | Reg); + EmitInt8(ARM::EHABI::UNWIND_OPCODE_SET_VSP | Reg); } /// Emit unwind opcodes to add $sp with an offset. void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) { if (Offset > 0x200) { uint8_t Buff[16]; - Buff[0] = UNWIND_OPCODE_INC_VSP_ULEB128; + Buff[0] = ARM::EHABI::UNWIND_OPCODE_INC_VSP_ULEB128; size_t ULEBSize = encodeULEB128((Offset - 0x204) >> 2, Buff + 1); EmitBytes(Buff, ULEBSize + 1); } else if (Offset > 0) { if (Offset > 0x100) { - EmitInt8(UNWIND_OPCODE_INC_VSP | 0x3fu); + EmitInt8(ARM::EHABI::UNWIND_OPCODE_INC_VSP | 0x3fu); Offset -= 0x100; } - EmitInt8(UNWIND_OPCODE_INC_VSP | static_cast<uint8_t>((Offset - 4) >> 2)); + EmitInt8(ARM::EHABI::UNWIND_OPCODE_INC_VSP | + static_cast<uint8_t>((Offset - 4) >> 2)); } else if (Offset < 0) { while (Offset < -0x100) { - EmitInt8(UNWIND_OPCODE_DEC_VSP | 0x3fu); + EmitInt8(ARM::EHABI::UNWIND_OPCODE_DEC_VSP | 0x3fu); Offset += 0x100; } - EmitInt8(UNWIND_OPCODE_DEC_VSP | + EmitInt8(ARM::EHABI::UNWIND_OPCODE_DEC_VSP | static_cast<uint8_t>(((-Offset) - 4) >> 2)); } } @@ -186,20 +188,23 @@ void UnwindOpcodeAssembler::Finalize(unsigned &PersonalityIndex, if (HasPersonality) { // User-specifed personality routine: [ SIZE , OP1 , OP2 , ... ] - PersonalityIndex = NUM_PERSONALITY_INDEX; + PersonalityIndex = ARM::EHABI::NUM_PERSONALITY_INDEX; size_t TotalSize = Ops.size() + 1; size_t RoundUpSize = (TotalSize + 3) / 4 * 4; Result.resize(RoundUpSize); OpStreamer.EmitSize(RoundUpSize); } else { - if (Ops.size() <= 3) { + // If no personalityindex is specified, select ane + if (PersonalityIndex == ARM::EHABI::NUM_PERSONALITY_INDEX) + PersonalityIndex = (Ops.size() <= 3) ? ARM::EHABI::AEABI_UNWIND_CPP_PR0 + : ARM::EHABI::AEABI_UNWIND_CPP_PR1; + if (PersonalityIndex == ARM::EHABI::AEABI_UNWIND_CPP_PR0) { // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ] - PersonalityIndex = AEABI_UNWIND_CPP_PR0; + assert(Ops.size() <= 3 && "too many opcodes for __aeabi_unwind_cpp_pr0"); Result.resize(4); OpStreamer.EmitPersonalityIndex(PersonalityIndex); } else { - // __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ] - PersonalityIndex = AEABI_UNWIND_CPP_PR1; + // __aeabi_unwind_cpp_pr{1,2}: [ {0x81,0x82} , SIZE , OP1 , OP2 , ... ] size_t TotalSize = Ops.size() + 2; size_t RoundUpSize = (TotalSize + 3) / 4 * 4; Result.resize(RoundUpSize); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h index ac67c6efabb7..cd5875924300 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h @@ -15,10 +15,8 @@ #ifndef ARM_UNWIND_OP_ASM_H #define ARM_UNWIND_OP_ASM_H -#include "ARMUnwindOp.h" - #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ARMEHABI.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -45,7 +43,7 @@ public: HasPersonality = 0; } - /// Set the personality index + /// Set the personality void setPersonality(const MCSymbol *Per) { HasPersonality = 1; } @@ -62,6 +60,12 @@ public: /// Emit unwind opcodes to add $sp with an offset. void EmitSPOffset(int64_t Offset); + /// Emit unwind raw opcodes + void EmitRaw(const SmallVectorImpl<uint8_t> &Opcodes) { + Ops.insert(Ops.end(), Opcodes.begin(), Opcodes.end()); + OpBegins.push_back(OpBegins.back() + Opcodes.size()); + } + /// Finalize the unwind opcode sequence for EmitBytes() void Finalize(unsigned &PersonalityIndex, SmallVectorImpl<uint8_t> &Result); diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp new file mode 100644 index 000000000000..d31f1f41c697 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -0,0 +1,82 @@ +//===-- ARMWinCOFFObjectWriter.cpp - ARM Windows COFF Object Writer -- C++ -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ARMFixupKinds.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCWinCOFFObjectWriter.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +namespace { +class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { +public: + ARMWinCOFFObjectWriter(bool Is64Bit) + : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) { + assert(!Is64Bit && "AArch64 support not yet implemented"); + } + virtual ~ARMWinCOFFObjectWriter() { } + + unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsCrossSection) const override; + + bool recordRelocation(const MCFixup &) const override; +}; + +unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsCrossSection) const { + assert(getMachine() == COFF::IMAGE_FILE_MACHINE_ARMNT && + "AArch64 support not yet implemented"); + + MCSymbolRefExpr::VariantKind Modifier = + Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + + switch (static_cast<unsigned>(Fixup.getKind())) { + default: llvm_unreachable("unsupported relocation type"); + case FK_Data_4: + switch (Modifier) { + case MCSymbolRefExpr::VK_COFF_IMGREL32: + return COFF::IMAGE_REL_ARM_ADDR32NB; + case MCSymbolRefExpr::VK_SECREL: + return COFF::IMAGE_REL_ARM_SECREL; + default: + return COFF::IMAGE_REL_ARM_ADDR32; + } + case FK_SecRel_2: + return COFF::IMAGE_REL_ARM_SECTION; + case FK_SecRel_4: + return COFF::IMAGE_REL_ARM_SECREL; + case ARM::fixup_t2_condbranch: + return COFF::IMAGE_REL_ARM_BRANCH20T; + case ARM::fixup_t2_uncondbranch: + return COFF::IMAGE_REL_ARM_BRANCH24T; + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_blx: + return COFF::IMAGE_REL_ARM_BLX23T; + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movt_hi16: + return COFF::IMAGE_REL_ARM_MOV32T; + } +} + +bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { + return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16; +} +} + +namespace llvm { +MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit) { + MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit); + return createWinCOFFObjectWriter(MOTW, OS); +} +} + diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp new file mode 100644 index 000000000000..b344ced2f67a --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -0,0 +1,46 @@ +//===-- ARMWinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARMMCTargetDesc.h" +#include "llvm/MC/MCWinCOFFStreamer.h" + +using namespace llvm; + +namespace { +class ARMWinCOFFStreamer : public MCWinCOFFStreamer { +public: + ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE, + raw_ostream &OS) + : MCWinCOFFStreamer(C, AB, CE, OS) { } + + void EmitAssemblerFlag(MCAssemblerFlag Flag) override; + void EmitThumbFunc(MCSymbol *Symbol) override; +}; + +void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + default: llvm_unreachable("not implemented"); + case MCAF_SyntaxUnified: + case MCAF_Code16: + break; + } +} + +void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { + getAssembler().setIsThumbFunc(Symbol); +} +} + +namespace llvm { +MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, + MCCodeEmitter &Emitter, raw_ostream &OS) { + return new ARMWinCOFFStreamer(Context, MAB, Emitter, OS); +} +} + diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp index 2e266c2e9624..f6d24e9e4f93 100644 --- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp +++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mlx-expansion" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "mlx-expansion" + static cl::opt<bool> ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden); static cl::opt<unsigned> @@ -40,9 +41,9 @@ namespace { static char ID; MLxExpansion() : MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "ARM MLA / MLS expansion pass"; } @@ -73,7 +74,7 @@ namespace { } void MLxExpansion::clearStack() { - std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0); + std::fill(LastMIs, LastMIs + 4, nullptr); MIIdx = 0; } @@ -88,7 +89,7 @@ MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { // real definition MI. This is important for _sfp instructions. unsigned Reg = MI->getOperand(1).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return 0; + return nullptr; MachineBasicBlock *MBB = MI->getParent(); MachineInstr *DefMI = MRI->getVRegDef(Reg); @@ -120,7 +121,7 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { return Reg; MachineBasicBlock *MBB = MI->getParent(); - MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg); + MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg); if (UseMI->getParent() != MBB) return Reg; @@ -129,7 +130,7 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { if (TargetRegisterInfo::isPhysicalRegister(Reg) || !MRI->hasOneNonDBGUse(Reg)) return Reg; - UseMI = &*MRI->use_nodbg_begin(Reg); + UseMI = &*MRI->use_instr_nodbg_begin(Reg); if (UseMI->getParent() != MBB) return Reg; } @@ -312,9 +313,9 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, dbgs() << "Expanding: " << *MI; dbgs() << " to:\n"; MachineBasicBlock::iterator MII = MI; - MII = llvm::prior(MII); + MII = std::prev(MII); MachineInstr &MI2 = *MII; - MII = llvm::prior(MII); + MII = std::prev(MII); MachineInstr &MI1 = *MII; dbgs() << " " << MI1; dbgs() << " " << MI2; @@ -335,7 +336,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { while (MII != E) { MachineInstr *MI = &*MII; - if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) { + if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy()) { ++MII; continue; } @@ -352,7 +353,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { if (Domain == ARMII::DomainGeneral) { if (++Skip == 2) // Assume dual issues of non-VFP / NEON instructions. - pushStack(0); + pushStack(nullptr); } else { Skip = 0; @@ -385,11 +386,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { isSwift = STI->isSwift(); bool Modified = false; - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { - MachineBasicBlock &MBB = *MFI; + for (MachineBasicBlock &MBB : Fn) Modified |= ExpandFPMLxInstructions(MBB); - } return Modified; } diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp index fa5681fb12bf..e4646713d4d7 100644 --- a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp @@ -7,17 +7,22 @@ // //===----------------------------------------------------------------------===// -#include "ARM.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; -Target llvm::TheARMTarget, llvm::TheThumbTarget; +Target llvm::TheARMLETarget, llvm::TheARMBETarget; +Target llvm::TheThumbLETarget, llvm::TheThumbBETarget; extern "C" void LLVMInitializeARMTargetInfo() { RegisterTarget<Triple::arm, /*HasJIT=*/true> - X(TheARMTarget, "arm", "ARM"); + X(TheARMLETarget, "arm", "ARM"); + RegisterTarget<Triple::armeb, /*HasJIT=*/true> + Y(TheARMBETarget, "armeb", "ARM (big endian)"); RegisterTarget<Triple::thumb, /*HasJIT=*/true> - Y(TheThumbTarget, "thumb", "Thumb"); + A(TheThumbLETarget, "thumb", "Thumb"); + RegisterTarget<Triple::thumbeb, /*HasJIT=*/true> + B(TheThumbBETarget, "thumbeb", "Thumb (big endian)"); } diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index cfb33f5b8212..baa97a7c479a 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -16,10 +16,14 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; +Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) + : ARMFrameLowering(sti) {} + bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); @@ -83,6 +87,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); const Thumb1InstrInfo &TII = @@ -91,10 +97,13 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); + assert(NumBytes >= ArgRegsSaveSize && + "ArgRegsSaveSize is included in NumBytes"); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned BasePtr = RegInfo->getBaseRegister(); + int CFAOffset = 0; // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; @@ -105,14 +114,26 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; - if (ArgRegsSaveSize) + if (ArgRegsSaveSize) { emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, MachineInstr::FrameSetup); + CFAOffset -= ArgRegsSaveSize; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, + if (NumBytes - ArgRegsSaveSize != 0) { + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); + CFAOffset -= NumBytes - ArgRegsSaveSize; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } return; } @@ -120,6 +141,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + if (STI.isTargetMachO()) { + GPRCS2Size += 4; + break; + } + // fallthrough case ARM::R4: case ARM::R5: case ARM::R6: @@ -129,17 +159,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { FramePtrSpillFI = FI; GPRCS1Size += 4; break; - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - if (STI.isTargetIOS()) - GPRCS2Size += 4; - else - GPRCS1Size += 4; - break; default: DPRCSSize += 8; } @@ -152,7 +171,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { } // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); @@ -165,27 +184,89 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { NumBytes = DPRCSOffset; int FramePtrOffsetInBlock = 0; - if (tryFoldSPUpdateIntoPushPop(MF, prior(MBBI), NumBytes)) { + unsigned adjustedGPRCS1Size = GPRCS1Size; + if (tryFoldSPUpdateIntoPushPop(STI, MF, std::prev(MBBI), NumBytes)) { FramePtrOffsetInBlock = NumBytes; + adjustedGPRCS1Size += NumBytes; NumBytes = 0; } + if (adjustedGPRCS1Size) { + CFAOffset -= adjustedGPRCS1Size; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + unsigned Reg = I->getReg(); + int FI = I->getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.isTargetMachO()) + break; + // fallthough + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + break; + } + } + + // Adjust FP so it point to the stack slot that contains the previous FP. if (HasFP) { - FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size; + FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + + GPRCS1Size + ArgRegsSaveSize; AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4) .setMIFlags(MachineInstr::FrameSetup)); + if(FramePtrOffsetInBlock) { + CFAOffset += FramePtrOffsetInBlock; + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( + nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } else { + unsigned CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MRI->getDwarfRegNum(FramePtr, true))); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. AFI->setShouldRestoreSPFromFP(true); } - if (NumBytes) + if (NumBytes) { // Insert it after all the callee-save spills. emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); + if (!HasFP) { + CFAOffset -= NumBytes; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } if (STI.isTargetELF() && HasFP) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - @@ -215,7 +296,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } -static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) { +static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { if (MI->getOpcode() == ARM::tLDRspi && MI->getOperand(1).isFI() && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) @@ -248,12 +329,14 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + assert((unsigned)NumBytes >= ArgRegsSaveSize && + "ArgRegsSaveSize is included in NumBytes"); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); + if (NumBytes - ArgRegsSaveSize != 0) + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize); } else { // Unwind MBBI to point to first LDR / VLDRD. if (MBBI != MBB.begin()) { @@ -267,7 +350,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, // Move SP to start of FP callee save spill area. NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + - AFI->getDPRCalleeSavedAreaSize()); + AFI->getDPRCalleeSavedAreaSize() + + ArgRegsSaveSize); if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; @@ -289,11 +373,11 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, } else { if (MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != MBBI && - prior(MBBI)->getOpcode() == ARM::tPOP) { - MachineBasicBlock::iterator PMBBI = prior(MBBI); - if (!tryFoldSPUpdateIntoPushPop(MF, PMBBI, NumBytes)) + std::prev(MBBI)->getOpcode() == ARM::tPOP) { + MachineBasicBlock::iterator PMBBI = std::prev(MBBI); + if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes)) emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); - } else if (!tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes)) + } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } @@ -304,9 +388,9 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, // we need to update the SP after popping the value. Therefore, we // pop the old LR into R3 as a temporary. - // Move back past the callee-saved register restoration - while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) - ++MBBI; + // Get the last instruction, tBX_RET + MBBI = MBB.getLastNonDebugInstr(); + assert (MBBI->getOpcode() == ARM::tBX_RET); // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(ARM::R3, RegState::Define); diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h index 5a300afd5d36..a227f8ece73f 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h @@ -11,44 +11,40 @@ // //===----------------------------------------------------------------------===// -#ifndef __THUMB_FRAMEINFO_H_ -#define __THUMB_FRAMEINFO_H_ +#ifndef LLVM_ARM_THUMB1FRAMELOWERING_H +#define LLVM_ARM_THUMB1FRAMELOWERING_H -#include "ARM.h" #include "ARMFrameLowering.h" -#include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" #include "Thumb1RegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { - class ARMSubtarget; class Thumb1FrameLowering : public ARMFrameLowering { public: - explicit Thumb1FrameLowering(const ARMSubtarget &sti) - : ARMFrameLowering(sti) { - } + explicit Thumb1FrameLowering(const ARMSubtarget &sti); /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const override; - bool hasReservedCallFrame(const MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const override; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; + void + eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 22a925e0ffbc..68cbb5c580df 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "Thumb1InstrInfo.h" -#include "ARM.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h index 36af20492d4e..c5845b75e587 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h @@ -14,7 +14,6 @@ #ifndef THUMB1INSTRUCTIONINFO_H #define THUMB1INSTRUCTIONINFO_H -#include "ARM.h" #include "ARMBaseInstrInfo.h" #include "Thumb1RegisterInfo.h" @@ -27,33 +26,33 @@ public: explicit Thumb1InstrInfo(const ARMSubtarget &STI); /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const; + void getNoopForMachoTarget(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. - unsigned getUnindexedOpcode(unsigned Opc) const; + unsigned getUnindexedOpcode(unsigned Opc) const override; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const Thumb1RegisterInfo &getRegisterInfo() const { return RI; } + const Thumb1RegisterInfo &getRegisterInfo() const override { return RI; } void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; + bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; }; } diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp index 65a7221d5db7..f907b143ef1b 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "Thumb1RegisterInfo.h" -#include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" @@ -30,7 +29,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -421,7 +419,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask); } Offset = (Offset - Mask * Scale); - MachineBasicBlock::iterator NII = llvm::next(II); + MachineBasicBlock::iterator NII = std::next(II); emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII, *this); } else { @@ -484,10 +482,8 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, return Offset == 0; } -void -Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, - unsigned BaseReg, int64_t Offset) const { - MachineInstr &MI = *I; +void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const { const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo*>( MI.getParent()->getParent()->getTarget().getInstrInfo()); diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h index 9689b23146a0..0c0abbe99042 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h @@ -15,7 +15,6 @@ #ifndef THUMB1REGISTERINFO_H #define THUMB1REGISTERINFO_H -#include "ARM.h" #include "ARMBaseRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -27,21 +26,20 @@ struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { public: Thumb1RegisterInfo(const ARMSubtarget &STI); - const TargetRegisterClass* - getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + const TargetRegisterClass * + getLargestLegalSuperClass(const TargetRegisterClass *RC) const override; - const TargetRegisterClass* - getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override; /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. - void emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, int Val, - ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0, - unsigned MIFlags = MachineInstr::NoFlags) const; + void + emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, + ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags) const override; // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be // however much remains to be handled. Return 'true' if no further @@ -49,16 +47,16 @@ public: bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII) const; - void resolveFrameIndex(MachineBasicBlock::iterator I, - unsigned BaseReg, int64_t Offset) const; + void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const override; bool saveScavengerRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC, - unsigned Reg) const; + unsigned Reg) const override; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; }; } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index 0b7d3bb7754f..edb9ff3a24f2 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "thumb2-it" #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "Thumb2InstrInfo.h" @@ -19,6 +18,8 @@ #include "llvm/CodeGen/MachineInstrBundle.h" using namespace llvm; +#define DEBUG_TYPE "thumb2-it" + STATISTIC(NumITs, "Number of IT blocks inserted"); STATISTIC(NumMovedInsts, "Number of predicated instructions moved"); @@ -33,9 +34,9 @@ namespace { const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "Thumb IT blocks insertion pass"; } @@ -242,7 +243,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Finalize the bundle. MachineBasicBlock::instr_iterator LI = LastITMI; - finalizeBundle(MBB, InsertPos.getInstrIterator(), llvm::next(LI)); + finalizeBundle(MBB, InsertPos.getInstrIterator(), std::next(LI)); Modified = true; ++NumITs; diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 91788ac4f893..a9df006fb8c1 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "Thumb2InstrInfo.h" -#include "ARM.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 2cdcd06e9350..34d45d307ff4 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -14,7 +14,6 @@ #ifndef THUMB2INSTRUCTIONINFO_H #define THUMB2INSTRUCTIONINFO_H -#include "ARM.h" #include "ARMBaseInstrInfo.h" #include "Thumb2RegisterInfo.h" @@ -28,40 +27,40 @@ public: explicit Thumb2InstrInfo(const ARMSubtarget &STI); /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const; + void getNoopForMachoTarget(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. - unsigned getUnindexedOpcode(unsigned Opc) const; + unsigned getUnindexedOpcode(unsigned Opc) const override; void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, - MachineBasicBlock *NewDest) const; + MachineBasicBlock *NewDest) const override; bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI) const; + MachineBasicBlock::iterator MBBI) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; + bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const Thumb2RegisterInfo &getRegisterInfo() const { return RI; } + const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; } }; /// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp index 4cb827f308ef..782d81fd424d 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -14,7 +14,6 @@ #include "Thumb2RegisterInfo.h" #include "ARM.h" -#include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" @@ -22,6 +21,8 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMSubtarget &sti) diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h index b1d63fa86d01..8a33e6cea919 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h @@ -15,9 +15,7 @@ #ifndef THUMB2REGISTERINFO_H #define THUMB2REGISTERINFO_H -#include "ARM.h" #include "ARMBaseRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { @@ -29,13 +27,11 @@ public: /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. - void emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, int Val, - ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0, - unsigned MIFlags = MachineInstr::NoFlags) const; + void + emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, + ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags) const override; }; } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 4795aae12fd6..09debe76f1b6 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -7,10 +7,8 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "t2-reduce-size" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMBaseRegisterInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "Thumb2InstrInfo.h" @@ -23,9 +21,11 @@ #include "llvm/IR/Function.h" // To access Function attributes #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "t2-reduce-size" + STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); @@ -137,9 +137,9 @@ namespace { const Thumb2InstrInfo *TII; const ARMSubtarget *STI; - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "Thumb2 instruction size reduction pass"; } @@ -256,8 +256,7 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { return HighLatencyCPSR || FirstInSelfLoop; SmallSet<unsigned, 2> Defs; - for (unsigned i = 0, e = CPSRDef->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = CPSRDef->getOperand(i); + for (const MachineOperand &MO : CPSRDef->operands()) { if (!MO.isReg() || MO.isUndef() || MO.isUse()) continue; unsigned Reg = MO.getReg(); @@ -266,8 +265,7 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { Defs.insert(Reg); } - for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = Use->getOperand(i); + for (const MachineOperand &MO : Use->operands()) { if (!MO.isReg() || MO.isUndef() || MO.isDef()) continue; unsigned Reg = MO.getReg(); @@ -858,8 +856,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { bool HasDef = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isUndef() || MO.isUse()) continue; if (MO.getReg() != ARM::CPSR) @@ -874,8 +871,7 @@ static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { } static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isUndef() || MO.isDef()) continue; if (MO.getReg() != ARM::CPSR) @@ -920,15 +916,14 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); - MachineInstr *BundleMI = 0; + MachineInstr *BundleMI = nullptr; - CPSRDef = 0; + CPSRDef = nullptr; HighLatencyCPSR = false; // Check predecessors for the latest CPSRDef. - for (MachineBasicBlock::pred_iterator - I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) { - const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()]; + for (auto *Pred : MBB.predecessors()) { + const MBBInfo &PInfo = BlockInfo[Pred->getNumber()]; if (!PInfo.Visited) { // Since blocks are visited in RPO, this must be a back-edge. continue; @@ -945,7 +940,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { - NextMII = llvm::next(MII); + NextMII = std::next(MII); MachineInstr *MI = &*MII; if (MI->isBundle()) { @@ -962,7 +957,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { Modified = true; - MachineBasicBlock::instr_iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = std::prev(NextMII); MI = &*I; // Removing and reinserting the first instruction in a bundle will break // up the bundle. Fix the bundling if it was broken. @@ -980,13 +975,16 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); if (MO && !MO->isDead()) LiveCPSR = true; + MO = BundleMI->findRegisterUseOperand(ARM::CPSR); + if (MO && !MO->isKill()) + LiveCPSR = true; } bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); if (MI->isCall()) { // Calls don't really set CPSR. - CPSRDef = 0; + CPSRDef = nullptr; HighLatencyCPSR = false; IsSelfLoop = false; } else if (DefCPSR) { @@ -1012,8 +1010,8 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { AttributeSet FnAttrs = MF.getFunction()->getAttributes(); OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); - MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::MinSize); + MinimizeSize = + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); BlockInfo.clear(); BlockInfo.resize(MF.getNumBlockIDs()); |