diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2010-09-17 15:48:55 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2010-09-17 15:48:55 +0000 |
commit | d39c594d39df7f283c2fb8a704a3f31c501180d9 (patch) | |
tree | 36453626c792cccd91f783a38a169d610a6b9db9 /lib/Target | |
parent | 6144c1de6a7674dad94290650e4e14f24d42e421 (diff) |
Vendor import of llvm r114020 (from the release_28 branch):vendor/llvm/llvm-r114020
http://llvm.org/svn/llvm-project/llvm/branches/release_28@114020
Approved by: rpaulo (mentor)
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=212793
svn path=/vendor/llvm/llvm-r114020/; revision=212794; tag=vendor/llvm/llvm-r114020
Diffstat (limited to 'lib/Target')
217 files changed, 10107 insertions, 7598 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 14825a785649..271ca44c2b69 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -30,22 +30,22 @@ class formatted_raw_ostream; namespace ARMCC { // The CondCodes constants map directly to the 4-bit encoding of the // condition field for predicated instructions. - enum CondCodes { - EQ, - NE, - HS, - LO, - MI, - PL, - VS, - VC, - HI, - LS, - GE, - LT, - GT, - LE, - AL + enum CondCodes { // Meaning (integer) Meaning (floating-point) + EQ, // Equal Equal + NE, // Not equal Not equal, or unordered + HS, // Carry set >, ==, or unordered + LO, // Carry clear Less than + MI, // Minus, negative Less than + PL, // Plus, positive or zero >, ==, or unordered + VS, // Overflow Unordered + VC, // No overflow Not unordered + HI, // Unsigned higher Greater than, or unordered + LS, // Unsigned lower or same Less than or equal + GE, // Greater than or equal Greater than or equal + LT, // Less than Less than, or unordered + GT, // Greater than Greater than + LE, // Less than or equal <, ==, or unordered + AL // Always (unconditional) Always (unconditional) }; inline static CondCodes getOppositeCondition(CondCodes CC) { @@ -90,6 +90,33 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { } } +namespace ARM_MB { + // The Memory Barrier Option constants map directly to the 4-bit encoding of + // the option field for memory barrier operations. + enum MemBOpt { + ST = 14, + ISH = 11, + ISHST = 10, + NSH = 7, + NSHST = 6, + OSH = 3, + OSHST = 2 + }; + + inline static const char *MemBOptToString(unsigned val) { + switch (val) { + default: llvm_unreachable("Unknown memory opetion"); + case ST: return "st"; + case ISH: return "ish"; + case ISHST: return "ishst"; + case NSH: return "nsh"; + case NSHST: return "nshst"; + case OSH: return "osh"; + case OSHST: return "oshst"; + } + } +} // namespace ARM_MB + FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); @@ -98,6 +125,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); +FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); FunctionPass *createNEONPreAllocPass(); FunctionPass *createNEONMoveFixPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index fa64d6c2a4b4..d6a8f19724dc 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -1,4 +1,4 @@ -//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===// +//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -20,20 +20,6 @@ include "llvm/Target/Target.td" // ARM Subtarget features. // -def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", - "ARM v4T">; -def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T", - "ARM v5T">; -def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", - "ARM v5TE, v5TEj, v5TExp">; -def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", - "ARM v6">; -def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", - "ARM v6t2">; -def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", - "ARM v7A">; -def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", - "ARM v7M">; def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", "Enable VFP2 instructions">; def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3", @@ -42,14 +28,20 @@ def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON", "Enable NEON instructions">; def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", "Enable Thumb2 instructions">; +def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", + "Does not support ARM mode execution">; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; -def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", +def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", "Enable Thumb2 extract and pack instructions">; +def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", + "Has data barrier (dmb / dsb) instructions">; def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; +def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", + "Floating point unit supports single precision only">; // Some processors have multiply-accumulate instructions that don't // play nicely with other VFP instructions, and it's generally better @@ -57,14 +49,41 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", // FIXME: Currently, this is only flagged for Cortex-A8. It may be true for // others as well. We should do more benchmarking and confirm one way or // the other. -def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true", - "Disable VFP MAC instructions">; +def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true", + "Disable VFP MAC instructions">; // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", "true", "Use NEON for single precision FP">; +// Disable 32-bit to 16-bit narrowing for experimentation. +def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", + "Prefer 32-bit Thumb instrs">; + + +// ARM architectures. +def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", + "ARM v4T">; +def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T", + "ARM v5T">; +def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", + "ARM v5TE, v5TEj, v5TExp">; +def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", + "ARM v6">; +def ArchV6M : SubtargetFeature<"v6m", "ARMArchVersion", "V6M", + "ARM v6m", + [FeatureNoARM, FeatureDB]>; +def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", + "ARM v6t2", + [FeatureThumb2]>; +def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", + "ARM v7A", + [FeatureThumb2, FeatureNEON, FeatureDB]>; +def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", + "ARM v7M", + [FeatureThumb2, FeatureNoARM, FeatureDB, + FeatureHWDiv]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -122,20 +141,23 @@ def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>; def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; +// V6M Processors. +def : Processor<"cortex-m0", ARMV6Itineraries, [ArchV6M]>; + // V6T2 Processors. -def : Processor<"arm1156t2-s", ARMV6Itineraries, - [ArchV6T2, FeatureThumb2]>; -def : Processor<"arm1156t2f-s", ARMV6Itineraries, - [ArchV6T2, FeatureThumb2, FeatureVFP2]>; +def : Processor<"arm1156t2-s", ARMV6Itineraries, [ArchV6T2]>; +def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>; // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, - FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>; + [ArchV7A, FeatureHasSlowVMLx, + FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>; -def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; -def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; + [ArchV7A, FeatureT2XtPk]>; + +// V7M Processors. +def : ProcNoItin<"cortex-m3", [ArchV7M]>; +def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 92a13f1d751c..db481005b3a4 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -458,6 +458,7 @@ namespace ARM_AM { // IB - Increment before // DA - Decrement after // DB - Decrement before + // For VFP instructions, only the IA and DB modes are valid. static inline AMSubMode getAM4SubMode(unsigned Mode) { return (AMSubMode)(Mode & 0x7); @@ -477,14 +478,6 @@ namespace ARM_AM { // // The first operand is always a Reg. The second operand encodes the // operation in bit 8 and the immediate in bits 0-7. - // - // This is also used for FP load/store multiple ops. The second operand - // encodes the number of registers (or 2 times the number of registers - // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the - // following two sub-modes: - // - // IA - Increment after - // DB - Decrement before /// getAM5Opc - This function encodes the addrmode5 opc field. static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { @@ -498,17 +491,6 @@ namespace ARM_AM { return ((AM5Opc >> 8) & 1) ? sub : add; } - /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and - /// VSTM instructions. - static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) { - assert((SubMode == ia || SubMode == db) && - "Illegal addressing mode 5 sub-mode!"); - return ((int)SubMode << 8) | Offset; - } - static inline AMSubMode getAM5SubMode(unsigned AM5Opc) { - return (AMSubMode)((AM5Opc >> 8) & 0x7); - } - //===--------------------------------------------------------------------===// // Addressing Mode #6 //===--------------------------------------------------------------------===// diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 946f4744f5bb..6cfd5961149f 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -17,7 +17,7 @@ #include "ARMBuildAttrs.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" -#include "ARMInstPrinter.h" +#include "AsmPrinter/ARMInstPrinter.h" #include "ARMMachineFunctionInfo.h" #include "ARMMCInstLower.h" #include "ARMTargetMachine.h" @@ -47,6 +47,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <cctype> @@ -56,6 +57,15 @@ static cl::opt<bool> EnableMCInst("enable-arm-mcinst-printer", cl::Hidden, cl::desc("enable experimental asmprinter gunk in the arm backend")); +namespace llvm { + namespace ARM { + enum DW_ISA { + DW_ISA_ARM_thumb = 1, + DW_ISA_ARM_arm = 2 + }; + } +} + namespace { class ARMAsmPrinter : public AsmPrinter { @@ -80,9 +90,9 @@ namespace { virtual const char *getPassName() const { return "ARM Assembly Printer"; } - + void printInstructionThroughMCStreamer(const MachineInstr *MI); - + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, const char *Modifier = 0); @@ -110,8 +120,12 @@ namespace { void printAddrModePCOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, const char *Modifier = 0); - void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum, - raw_ostream &O); + void printBitfieldInvMaskImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); + void printMemBOption(const MachineInstr *MI, int OpNum, + raw_ostream &O); + void printShiftImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); @@ -190,12 +204,32 @@ namespace { virtual void EmitInstruction(const MachineInstr *MI); bool runOnMachineFunction(MachineFunction &F); - + virtual void EmitConstantPool() {} // we emit constant pools customly! virtual void EmitFunctionEntryLabel(); void EmitStartOfAsmFile(Module &M); void EmitEndOfAsmFile(Module &M); + MachineLocation getDebugValueLocation(const MachineInstr *MI) const { + MachineLocation Location; + assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + else { + DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + } + return Location; + } + + virtual unsigned getISAEncoding() { + // ARM/Darwin adds ISA to the DWARF info for each function. + if (!Subtarget->isTargetDarwin()) + return 0; + return Subtarget->isThumb() ? + llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm; + } + MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, const MachineBasicBlock *MBB) const; MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const; @@ -208,7 +242,7 @@ namespace { EmitMachineConstantPoolValue(MCPV, OS); OutStreamer.EmitRawText(OS.str()); } - + void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV, raw_ostream &O) { switch (TM.getTargetData()->getTypeAllocSize(MCPV->getType())) { @@ -234,7 +268,7 @@ namespace { // FIXME: Remove this when Darwin transition to @GOT like syntax. MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); O << *Sym; - + MachineModuleInfoMachO &MMIMachO = MMI->getObjFileInfo<MachineModuleInfoMachO>(); MachineModuleInfoImpl::StubValueTy &StubSym = @@ -278,7 +312,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitRawText(OS.str()); } } - + OutStreamer.EmitLabel(CurrentFnSym); } @@ -358,7 +392,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, case MachineOperand::MO_ExternalSymbol: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); O << *GetExternalSymbolSymbol(MO.getSymbolName()); - + if (isCallOp && Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_) O << "(PLT)"; @@ -438,15 +472,13 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op, O << getRegisterName(MO1.getReg()); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())) - << " "; - + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (MO2.getReg()) { - O << getRegisterName(MO2.getReg()); + O << ' ' << getRegisterName(MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else { - O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } else if (ShOpc != ARM_AM::rrx) { + O << " #" << ARM_AM::getSORegOffset(MO3.getImm()); } } @@ -575,16 +607,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - if (Modifier && strcmp(Modifier, "submode") == 0) { - ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - O << ARM_AM::getAMSubModeStr(Mode); - return; - } else if (Modifier && strcmp(Modifier, "base") == 0) { - // Used for FSTM{D|S} and LSTM{D|S} operations. - O << getRegisterName(MO1.getReg()); - return; - } - O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { @@ -641,6 +663,32 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op, O << "#" << lsb << ", #" << width; } +void +ARMAsmPrinter::printMemBOption(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + unsigned val = MI->getOperand(OpNum).getImm(); + O << ARM_MB::MemBOptToString(val); +} + +void ARMAsmPrinter::printShiftImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + unsigned ShiftOp = MI->getOperand(OpNum).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + switch (Opc) { + case ARM_AM::no_shift: + return; + case ARM_AM::lsl: + O << ", lsl #"; + break; + case ARM_AM::asr: + O << ", asr #"; + break; + default: + assert(0 && "unexpected shift opcode for shift immediate operand"); + } + O << ARM_AM::getSORegOffset(ShiftOp); +} + //===--------------------------------------------------------------------===// void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op, @@ -737,12 +785,11 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum, O << getRegisterName(Reg); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())) - << " "; - assert(MO2.isImm() && "Not a valid t2_so_reg value!"); - O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); + if (ShOpc != ARM_AM::rrx) + O << " #" << ARM_AM::getSORegOffset(MO2.getImm()); } void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI, @@ -916,12 +963,12 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum, const MachineOperand &MO1 = MI->getOperand(OpNum); const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id - + unsigned JTI = MO1.getIndex(); MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); // Can't use EmitLabel until instprinter happens, label comes out in the wrong // order. - O << *JTISymbol << ":\n"; + O << "\n" << *JTISymbol << ":\n"; const char *JTEntryDirective = MAI->getData32bitsDirective(); @@ -958,12 +1005,12 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum, const MachineOperand &MO1 = MI->getOperand(OpNum); const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); - + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); - + // Can't use EmitLabel until instprinter happens, label comes out in the wrong // order. - O << *JTISymbol << ":\n"; + O << "\n" << *JTISymbol << ":\n"; const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); @@ -980,7 +1027,7 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum, O << MAI->getData8bitsDirective(); else if (HalfWordOffset) O << MAI->getData16bitsDirective(); - + if (ByteOffset || HalfWordOffset) O << '(' << *MBB->getSymbol() << "-" << *JTISymbol << ")/2"; else @@ -1086,10 +1133,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { printInstructionThroughMCStreamer(MI); return; } - + if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY) EmitAlignment(2); - + SmallString<128> Str; raw_svector_ostream OS(Str); if (MI->getOpcode() == ARM::DBG_VALUE) { @@ -1112,7 +1159,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { printInstruction(MI, OS); OutStreamer.EmitRawText(OS.str()); - + // Make sure the instruction that follows TBB is 2-byte aligned. // FIXME: Constant island pass should insert an "ALIGN" instruction instead. if (MI->getOpcode() == ARM::t2TBB) @@ -1129,7 +1176,7 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { // avoid out-of-range branches that are due a fundamental limitation of // the way symbol offsets are encoded with the current Darwin ARM // relocations. - const TargetLoweringObjectFileMachO &TLOFMacho = + const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>( getObjFileLowering()); OutStreamer.SwitchSection(TLOFMacho.getTextSection()); @@ -1148,6 +1195,12 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { 16, SectionKind::getText()); OutStreamer.SwitchSection(sect); } + const MCSection *StaticInitSect = + OutContext.getMachOSection("__TEXT", "__StaticInit", + MCSectionMachO::S_REGULAR | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + OutStreamer.SwitchSection(StaticInitSect); } } @@ -1173,8 +1226,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer.EmitRawText("\t.eabi_attribute " + Twine(ARMBuildAttrs::ABI_FP_exceptions) + ", 1"); } - - if (FiniteOnlyFPMath()) + + if (NoInfsFPMath && NoNaNsFPMath) OutStreamer.EmitRawText("\t.eabi_attribute " + Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 1"); else @@ -1280,7 +1333,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // LPC0: // add r0, pc, r0 // This adds the address of LPC0 to r0. - + // Emit the label. // FIXME: MOVE TO SHARED PLACE. unsigned Id = (unsigned)MI->getOperand(2).getImm(); @@ -1288,8 +1341,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix) + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id)); OutStreamer.EmitLabel(Label); - - + + // Form and emit tha dd. MCInst AddInst; AddInst.setOpcode(ARM::ADDrr); @@ -1315,7 +1368,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); else EmitGlobalConstant(MCPE.Val.ConstVal); - + return; } case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file. @@ -1325,13 +1378,13 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); - + { MCInst TmpInst; TmpInst.setOpcode(ARM::MOVi); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1)); - + // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); @@ -1349,11 +1402,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - + TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out OutStreamer.EmitInstruction(TmpInst); } - return; + return; } case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file. // This is a hack that lowers as a two instruction sequence. @@ -1384,32 +1437,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.setOpcode(ARM::MOVi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg TmpInst.addOperand(V1); // lower16(imm) - + // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - + OutStreamer.EmitInstruction(TmpInst); } - + { MCInst TmpInst; TmpInst.setOpcode(ARM::MOVTi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg TmpInst.addOperand(V2); // upper16(imm) - + // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - + OutStreamer.EmitInstruction(TmpInst); } - + return; } } - + MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); OutStreamer.EmitInstruction(TmpInst); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 49c16f3e0720..3a8bebe0dd24 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -15,9 +15,9 @@ #include "ARM.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" -#include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "ARMGenInstrInfo.inc" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/GlobalValue.h" @@ -501,7 +501,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { llvm_unreachable("Unknown or unset size field for instr!"); case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::DBG_VALUE: return 0; @@ -573,48 +573,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 0; // Not reached } -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -/// -bool -ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned& SrcSubIdx, unsigned& DstSubIdx) const { - switch (MI.getOpcode()) { - default: break; - case ARM::VMOVS: - case ARM::VMOVD: - case ARM::VMOVDneon: - case ARM::VMOVQ: - case ARM::VMOVQQ : { - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SrcSubIdx = MI.getOperand(1).getSubReg(); - DstSubIdx = MI.getOperand(0).getSubReg(); - return true; - } - case ARM::MOVr: - case ARM::MOVr_TC: - case ARM::tMOVr: - case ARM::tMOVgpr2tgpr: - case ARM::tMOVtgpr2gpr: - case ARM::tMOVgpr2gpr: - case ARM::t2MOVr: { - assert(MI.getDesc().getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "Invalid ARM MOV instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SrcSubIdx = MI.getOperand(1).getSubReg(); - DstSubIdx = MI.getOperand(0).getSubReg(); - return true; - } - } - - return false; -} - unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { @@ -763,8 +721,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Align); // tGPR is used sometimes in ARM instructions that need to avoid using - // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) + // certain registers. Just treat it as GPR here. Likewise, rGPR. + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass + || RC == ARM::rGPRRegisterClass) RC = ARM::GPRRegisterClass; switch (RC->getID()) { @@ -798,7 +757,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addMemOperand(MMO)); } break; @@ -818,7 +777,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); @@ -830,7 +789,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); @@ -865,7 +824,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass + || RC == ARM::rGPRRegisterClass) RC = ARM::GPRRegisterClass; switch (RC->getID()) { @@ -893,7 +853,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addMemOperand(MMO)); } break; @@ -910,7 +870,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); @@ -922,7 +882,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); @@ -963,6 +923,11 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { unsigned PCLabelId = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *NewCPV = 0; + // FIXME: The below assumes PIC relocation model and that the function + // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and + // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR + // instructions, so that's probably OK, but is PIC always correct when + // we get here? if (ACPV->isGlobalValue()) NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, ARMCP::CPValue, 4); @@ -972,6 +937,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { else if (ACPV->isBlockAddress()) NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, ARMCP::CPBlockAddress, 4); + else if (ACPV->isLSDA()) + NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId, + ARMCP::CPLSDA, 4); else llvm_unreachable("Unexpected ARM constantpool value type!!"); CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); @@ -1393,3 +1361,63 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Offset = (isSub) ? -Offset : Offset; return Offset == 0; } + +bool ARMBaseInstrInfo:: +AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpValue) const { + switch (MI->getOpcode()) { + default: break; + case ARM::CMPri: + case ARM::CMPzri: + case ARM::t2CMPri: + case ARM::t2CMPzri: + SrcReg = MI->getOperand(0).getReg(); + CmpValue = MI->getOperand(1).getImm(); + return true; + } + + return false; +} + +/// ConvertToSetZeroFlag - Convert the instruction to set the "zero" flag so +/// that we can remove a "comparison with zero". +bool ARMBaseInstrInfo:: +ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const { + // Conservatively refuse to convert an instruction which isn't in the same BB + // as the comparison. + if (MI->getParent() != CmpInstr->getParent()) + return false; + + // Check that CPSR isn't set between the comparison instruction and the one we + // want to change. + MachineBasicBlock::const_iterator I = CmpInstr, E = MI; + --I; + for (; I != E; --I) { + const MachineInstr &Instr = *I; + + for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { + const MachineOperand &MO = Instr.getOperand(IO); + if (!MO.isReg() || !MO.isDef()) continue; + + // This instruction modifies CPSR before the one we want to change. We + // can't do this transformation. + if (MO.getReg() == ARM::CPSR) + return false; + } + } + + // Set the "zero" bit in CPSR. + switch (MI->getOpcode()) { + default: break; + case ARM::ADDri: + case ARM::SUBri: + case ARM::t2ADDri: + case ARM::t2SUBri: + MI->RemoveOperand(5); + MachineInstrBuilder(MI) + .addReg(ARM::CPSR, RegState::Define | RegState::Implicit); + CmpInstr->eraseFromParent(); + return true; + } + + return false; +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 89a2db74a75e..b4f4a33a70ad 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -15,11 +15,12 @@ #define ARMBASEINSTRUCTIONINFO_H #include "ARM.h" -#include "ARMRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetInstrInfo.h" namespace llvm { + class ARMSubtarget; + class ARMBaseRegisterInfo; /// ARMII - This namespace holds all of the target specific flags that /// instruction info tracks. @@ -97,44 +98,45 @@ namespace ARMII { // Miscellaneous arithmetic instructions ArithMiscFrm = 12 << FormShift, + SatFrm = 13 << FormShift, // Extend instructions - ExtFrm = 13 << FormShift, + ExtFrm = 14 << FormShift, // VFP formats - VFPUnaryFrm = 14 << FormShift, - VFPBinaryFrm = 15 << FormShift, - VFPConv1Frm = 16 << FormShift, - VFPConv2Frm = 17 << FormShift, - VFPConv3Frm = 18 << FormShift, - VFPConv4Frm = 19 << FormShift, - VFPConv5Frm = 20 << FormShift, - VFPLdStFrm = 21 << FormShift, - VFPLdStMulFrm = 22 << FormShift, - VFPMiscFrm = 23 << FormShift, + VFPUnaryFrm = 15 << FormShift, + VFPBinaryFrm = 16 << FormShift, + VFPConv1Frm = 17 << FormShift, + VFPConv2Frm = 18 << FormShift, + VFPConv3Frm = 19 << FormShift, + VFPConv4Frm = 20 << FormShift, + VFPConv5Frm = 21 << FormShift, + VFPLdStFrm = 22 << FormShift, + VFPLdStMulFrm = 23 << FormShift, + VFPMiscFrm = 24 << FormShift, // Thumb format - ThumbFrm = 24 << FormShift, + ThumbFrm = 25 << FormShift, // Miscelleaneous format - MiscFrm = 25 << FormShift, + MiscFrm = 26 << FormShift, // NEON formats - NGetLnFrm = 26 << FormShift, - NSetLnFrm = 27 << FormShift, - NDupFrm = 28 << FormShift, - NLdStFrm = 29 << FormShift, - N1RegModImmFrm= 30 << FormShift, - N2RegFrm = 31 << FormShift, - NVCVTFrm = 32 << FormShift, - NVDupLnFrm = 33 << FormShift, - N2RegVShLFrm = 34 << FormShift, - N2RegVShRFrm = 35 << FormShift, - N3RegFrm = 36 << FormShift, - N3RegVShFrm = 37 << FormShift, - NVExtFrm = 38 << FormShift, - NVMulSLFrm = 39 << FormShift, - NVTBLFrm = 40 << FormShift, + NGetLnFrm = 27 << FormShift, + NSetLnFrm = 28 << FormShift, + NDupFrm = 29 << FormShift, + NLdStFrm = 30 << FormShift, + N1RegModImmFrm= 31 << FormShift, + N2RegFrm = 32 << FormShift, + NVCVTFrm = 33 << FormShift, + NVDupLnFrm = 34 << FormShift, + N2RegVShLFrm = 35 << FormShift, + N2RegVShRFrm = 36 << FormShift, + N3RegFrm = 37 << FormShift, + N3RegVShFrm = 38 << FormShift, + NVExtFrm = 39 << FormShift, + NVMulSLFrm = 40 << FormShift, + NVTBLFrm = 41 << FormShift, //===------------------------------------------------------------------===// // Misc flags. @@ -198,7 +200,7 @@ namespace ARMII { } class ARMBaseInstrInfo : public TargetInstrInfoImpl { - const ARMSubtarget& Subtarget; + const ARMSubtarget &Subtarget; protected: // Can be only subclassed. explicit ARMBaseInstrInfo(const ARMSubtarget &STI); @@ -223,7 +225,7 @@ public: virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const; + bool AllowModify = false) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, @@ -262,12 +264,6 @@ public: /// virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; virtual unsigned isStoreToStackSlot(const MachineInstr *MI, @@ -341,6 +337,17 @@ public: unsigned NumInstrs) const { return NumInstrs && NumInstrs == 1; } + + /// AnalyzeCompare - For a comparison instruction, return the source register + /// in SrcReg and the value it compares against in CmpValue. Return true if + /// the comparison instruction can be analyzed. + virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + int &CmpValue) const; + + /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so + /// that we can remove a "comparison with zero". + virtual bool ConvertToSetZeroFlag(MachineInstr *Instr, + MachineInstr *CmpInstr) const; }; static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 182bd9937145..eceafad63f17 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -40,13 +40,20 @@ #include "llvm/Support/CommandLine.h" namespace llvm { -cl::opt<bool> -ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true), - cl::desc("Reuse repeated frame index values")); +static cl::opt<bool> +ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false), + cl::desc("Force use of virtual base registers for stack load/store")); +static cl::opt<bool> +EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden, + cl::desc("Enable pre-regalloc stack frame index allocation")); } using namespace llvm; +static cl::opt<bool> +EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true), + cl::desc("Enable use of a base pointer for complex stack frames")); + unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, bool *isSPVFP) { if (isSPVFP) @@ -143,7 +150,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), TII(tii), STI(sti), - FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) { + FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), + BasePtr(ARM::R6) { } const unsigned* @@ -176,8 +184,11 @@ getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); - if (STI.isTargetDarwin() || hasFP(MF)) + Reserved.set(ARM::FPSCR); + if (hasFP(MF)) Reserved.set(FramePtr); + if (hasBasePointer(MF)) + Reserved.set(BasePtr); // Some targets reserve R9. if (STI.isR9Reserved()) Reserved.set(ARM::R9); @@ -191,9 +202,13 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, case ARM::SP: case ARM::PC: return true; + case ARM::R6: + if (hasBasePointer(MF)) + return true; + break; case ARM::R7: case ARM::R11: - if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF))) + if (FramePtr == Reg && hasFP(MF)) return true; break; case ARM::R9: @@ -510,7 +525,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, return std::make_pair(RC->allocation_order_begin(MF), RC->allocation_order_end(MF)); - if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!hasFP(MF)) { if (!STI.isR9Reserved()) return std::make_pair(GPREven1, GPREven1 + (sizeof(GPREven1)/sizeof(unsigned))); @@ -539,7 +554,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, return std::make_pair(RC->allocation_order_begin(MF), RC->allocation_order_end(MF)); - if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!hasFP(MF)) { if (!STI.isR9Reserved()) return std::make_pair(GPROdd1, GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned))); @@ -609,30 +624,68 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, /// or if frame pointer elimination is disabled. /// bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { + // Mac OS X requires FP not to be clobbered for backtracing purpose. + if (STI.isTargetDarwin()) + return true; + const MachineFrameInfo *MFI = MF.getFrameInfo(); - return ((DisableFramePointerElim(MF) && MFI->adjustsStack())|| + // Always eliminate non-leaf frame pointers. + return ((DisableFramePointerElim(MF) && MFI->hasCalls()) || needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); } +bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + if (!EnableBasePointer) + return false; + + if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) + return true; + + // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited + // negative range for ldr/str (255), and thumb1 is positive offsets only. + // It's going to be better to use the SP or Base Pointer instead. When there + // are variable sized objects, we can't reference off of the SP, so we + // reserve a Base Pointer. + if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) { + // Conservatively estimate whether the negative offset from the frame + // pointer will be sufficient to reach. If a function has a smallish + // frame, it's less likely to have lots of spills and callee saved + // space, so it's all more likely to be within range of the frame pointer. + // If it's wrong, the scavenger will still enable access to work, it just + // won't be optimal. + if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128) + return false; + return true; + } + + return false; +} + bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - return (RealignStack && - !AFI->isThumb1OnlyFunction() && - !MFI->hasVarSizedObjects()); + // We can't realign the stack if: + // 1. Dynamic stack realignment is explicitly disabled, + // 2. This is a Thumb1 function (it's not useful, so we don't bother), or + // 3. There are VLAs in the function and the base pointer is disabled. + return (RealignStack && !AFI->isThumb1OnlyFunction() && + (!MFI->hasVarSizedObjects() || EnableBasePointer)); } bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const Function *F = MF.getFunction(); unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - return (RealignStack && - !AFI->isThumb1OnlyFunction() && - (MFI->getMaxAlignment() > StackAlign) && - !MFI->hasVarSizedObjects()); + bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) || + F->hasFnAttr(Attribute::StackAlignment)); + + return requiresRealignment && canRealignStack(MF); } bool ARMBaseRegisterInfo:: @@ -668,6 +721,7 @@ static unsigned estimateStackSize(MachineFunction &MF) { /// instructions will require a scratch register during their expansion later. unsigned ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned Limit = (1 << 12) - 1; for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); @@ -693,7 +747,10 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { Limit = std::min(Limit, ((1U << 8) - 1) * 4); break; case ARMII::AddrModeT2_i12: - if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1); + // i12 supports only positive offset so these will be converted to + // i8 opcodes. See llvm::rewriteT2FrameIndex. + if (hasFP(MF) && AFI->hasStackFrame()) + Limit = std::min(Limit, (1U << 8) - 1); break; case ARMII::AddrMode6: // Addressing mode 6 (load/store) instructions can't encode an @@ -710,6 +767,19 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { return Limit; } +static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, + const ARMBaseInstrInfo &TII) { + unsigned FnSize = 0; + for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + const MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); + I != E; ++I) + FnSize += TII.GetInstSizeInBytes(I); + } + return FnSize; +} + void ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -737,6 +807,10 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0) MF.getRegInfo().setPhysRegUsed(ARM::LR); + // Spill the BasePtr if it's used. + if (hasBasePointer(MF)) + MF.getRegInfo().setPhysRegUsed(BasePtr); + // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. const unsigned *CSRegs = getCalleeSavedRegs(); @@ -807,7 +881,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, bool ForceLRSpill = false; if (!LRSpilled && AFI->isThumb1OnlyFunction()) { - unsigned FnSize = TII.GetFunctionSizeInBytes(MF); + unsigned FnSize = GetFunctionSizeInBytes(MF, TII); // Force LR to be spilled if the Thumb function size is > 2048. This enables // use of BL to implement far jump. If it turns out that it's not needed // then the branch fix up path will undo it. @@ -824,13 +898,19 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // slot of the previous FP. Also, if we have variable sized objects in the // function, stack slot references will often be negative, and some of // our instructions are positive-offset only, so conservatively consider - // that case to want a spill slot (or register) as well. + // that case to want a spill slot (or register) as well. Similarly, if + // the function adjusts the stack pointer during execution and the + // adjustments aren't already part of our stack size estimate, our offset + // calculations may be off, so be conservative. // FIXME: We could add logic to be more precise about negative offsets // and which instructions will need a scratch register for them. Is it // worth the effort and added fragility? bool BigStack = - (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >= - estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects(); + (RS && + (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= + estimateRSStackSizeLimit(MF))) + || MFI->hasVarSizedObjects() + || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); bool ExtraCSSpill = false; if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) { @@ -848,9 +928,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, ExtraCSSpill = true; } - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - if (STI.isTargetDarwin() || hasFP(MF)) { + if (hasFP(MF)) { MF.getRegInfo().setPhysRegUsed(FramePtr); NumGPRSpills++; } @@ -941,55 +1019,88 @@ unsigned ARMBaseRegisterInfo::getRARegister() const { return ARM::LR; } -unsigned +unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - if (STI.isTargetDarwin() || hasFP(MF)) + if (hasFP(MF)) return FramePtr; return ARM::SP; } +// Provide a base+offset reference to an FI slot for debug info. It's the +// same as what we use for resolving the code-gen references for now. +// FIXME: This can go wrong when references are SP-relative and simple call +// frames aren't used. int ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { + return ResolveFrameIndexReference(MF, FI, FrameReg, 0); +} + +int +ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg, + int SPAdj) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); + int FPOffset = Offset - AFI->getFramePtrSpillOffset(); bool isFixed = MFI->isFixedObjectIndex(FI); FrameReg = ARM::SP; + Offset += SPAdj; if (AFI->isGPRCalleeSavedArea1Frame(FI)) - Offset -= AFI->getGPRCalleeSavedArea1Offset(); + return Offset - AFI->getGPRCalleeSavedArea1Offset(); else if (AFI->isGPRCalleeSavedArea2Frame(FI)) - Offset -= AFI->getGPRCalleeSavedArea2Offset(); + return Offset - AFI->getGPRCalleeSavedArea2Offset(); else if (AFI->isDPRCalleeSavedAreaFrame(FI)) - Offset -= AFI->getDPRCalleeSavedAreaOffset(); - else if (needsStackRealignment(MF)) { - // When dynamically realigning the stack, use the frame pointer for - // parameters, and the stack pointer for locals. + return Offset - AFI->getDPRCalleeSavedAreaOffset(); + + // When dynamically realigning the stack, use the frame pointer for + // parameters, and the stack/base pointer for locals. + if (needsStackRealignment(MF)) { assert (hasFP(MF) && "dynamic stack realignment without a FP!"); if (isFixed) { FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); + Offset = FPOffset; + } else if (MFI->hasVarSizedObjects()) { + assert(hasBasePointer(MF) && + "VLAs and dynamic stack alignment, but missing base pointer!"); + FrameReg = BasePtr; } - } else if (hasFP(MF) && AFI->hasStackFrame()) { - if (isFixed || MFI->hasVarSizedObjects()) { - // Use frame pointer to reference fixed objects unless this is a - // frameless function. + return Offset; + } + + // If there is a frame pointer, use it when we can. + if (hasFP(MF) && AFI->hasStackFrame()) { + // Use frame pointer to reference fixed objects. Use it for locals if + // there are VLAs (and thus the SP isn't reliable as a base). + if (isFixed || (MFI->hasVarSizedObjects() && !hasBasePointer(MF))) { FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); + return FPOffset; + } else if (MFI->hasVarSizedObjects()) { + assert(hasBasePointer(MF) && "missing base pointer!"); + // Use the base register since we have it. + FrameReg = BasePtr; } else if (AFI->isThumb2Function()) { - // In Thumb2 mode, the negative offset is very limited. - int FPOffset = Offset - AFI->getFramePtrSpillOffset(); + // In Thumb2 mode, the negative offset is very limited. Try to avoid + // out of range references. if (FPOffset >= -255 && FPOffset < 0) { FrameReg = getFrameRegister(MF); - Offset = FPOffset; + return FPOffset; } + } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { + // Otherwise, use SP or FP, whichever is closer to the stack slot. + FrameReg = getFrameRegister(MF); + return FPOffset; } } + // Use the base pointer if we have one. + if (hasBasePointer(MF)) + FrameReg = BasePtr; return Offset; } - int ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { @@ -1024,7 +1135,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, case ARM::R5: return ARM::R4; case ARM::R7: - return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6; + return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) + ? 0 : ARM::R6; case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; case ARM::R11: @@ -1113,7 +1225,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, case ARM::R4: return ARM::R5; case ARM::R6: - return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7; + return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) + ? 0 : ARM::R7; case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; case ARM::R10: @@ -1220,13 +1333,18 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const { return true; } +bool ARMBaseRegisterInfo:: +requiresVirtualBaseRegisters(const MachineFunction &MF) const { + return EnableLocalStackAlloc; +} + // hasReservedCallFrame - Under normal circumstances, when a frame pointer is // not required, we reserve argument space for call sites in the function // immediately on entry to the current function. This eliminates the need for // add/sub sp brackets around call sites. Returns true if the call frame is // included as part of the stack frame. bool ARMBaseRegisterInfo:: -hasReservedCallFrame(MachineFunction &MF) const { +hasReservedCallFrame(const MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); // It's not always a good idea to include the call frame as part of the @@ -1244,7 +1362,7 @@ hasReservedCallFrame(MachineFunction &MF) const { // is not sufficient here since we still may reference some objects via SP // even when FP is available in Thumb2 mode. bool ARMBaseRegisterInfo:: -canSimplifyCallFramePseudos(MachineFunction &MF) const { +canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); } @@ -1305,10 +1423,258 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -unsigned +int64_t ARMBaseRegisterInfo:: +getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { + const TargetInstrDesc &Desc = MI->getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + int64_t InstrOffs = 0;; + int Scale = 1; + unsigned ImmIdx = 0; + switch (AddrMode) { + case ARMII::AddrModeT2_i8: + case ARMII::AddrModeT2_i12: + // i8 supports only negative, and i12 supports only positive, so + // based on Offset sign, consider the appropriate instruction + InstrOffs = MI->getOperand(Idx+1).getImm(); + Scale = 1; + break; + case ARMII::AddrMode5: { + // VFP address mode. + const MachineOperand &OffOp = MI->getOperand(Idx+1); + InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm()); + if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub) + InstrOffs = -InstrOffs; + Scale = 4; + break; + } + case ARMII::AddrMode2: { + ImmIdx = Idx+2; + InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs = -InstrOffs; + break; + } + case ARMII::AddrMode3: { + ImmIdx = Idx+2; + InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs = -InstrOffs; + break; + } + case ARMII::AddrModeT1_s: { + ImmIdx = Idx+1; + InstrOffs = MI->getOperand(ImmIdx).getImm(); + Scale = 4; + break; + } + default: + llvm_unreachable("Unsupported addressing mode!"); + break; + } + + return InstrOffs * Scale; +} + +/// needsFrameBaseReg - Returns true if the instruction's frame index +/// reference would be better served by a base register other than FP +/// or SP. Used by LocalStackFrameAllocation to determine which frame index +/// references it should create new base registers for. +bool ARMBaseRegisterInfo:: +needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { + for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) { + assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); + } + + // It's the load/store FI references that cause issues, as it can be difficult + // to materialize the offset if it won't fit in the literal field. Estimate + // based on the size of the local frame and some conservative assumptions + // about the rest of the stack frame (note, this is pre-regalloc, so + // we don't know everything for certain yet) whether this offset is likely + // to be out of range of the immediate. Return true if so. + + // We only generate virtual base registers for loads and stores, so + // return false for everything else. + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case ARM::LDR: case ARM::LDRH: case ARM::LDRB: + case ARM::STR: case ARM::STRH: case ARM::STRB: + case ARM::t2LDRi12: case ARM::t2LDRi8: + case ARM::t2STRi12: case ARM::t2STRi8: + case ARM::VLDRS: case ARM::VLDRD: + case ARM::VSTRS: case ARM::VSTRD: + case ARM::tSTRspi: case ARM::tLDRspi: + if (ForceAllBaseRegAlloc) + return true; + break; + default: + return false; + } + + // Without a virtual base register, if the function has variable sized + // objects, all fixed-size local references will be via the frame pointer, + // Approximate the offset and see if it's legal for the instruction. + // Note that the incoming offset is based on the SP value at function entry, + // so it'll be negative. + MachineFunction &MF = *MI->getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + // Estimate an offset from the frame pointer. + // Conservatively assume all callee-saved registers get pushed. R4-R6 + // will be earlier than the FP, so we ignore those. + // R7, LR + int64_t FPOffset = Offset - 8; + // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15 + if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction()) + FPOffset -= 80; + // Estimate an offset from the stack pointer. + // The incoming offset is relating to the SP at the start of the function, + // but when we access the local it'll be relative to the SP after local + // allocation, so adjust our SP-relative offset by that allocation size. + Offset = -Offset; + Offset += MFI->getLocalFrameSize(); + // Assume that we'll have at least some spill slots allocated. + // FIXME: This is a total SWAG number. We should run some statistics + // and pick a real one. + Offset += 128; // 128 bytes of spill slots + + // If there is a frame pointer, try using it. + // The FP is only available if there is no dynamic realignment. We + // don't know for sure yet whether we'll need that, so we guess based + // on whether there are any local variables that would trigger it. + unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + if (hasFP(MF) && + !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { + if (isFrameOffsetLegal(MI, FPOffset)) + return false; + } + // If we can reference via the stack pointer, try that. + // FIXME: This (and the code that resolves the references) can be improved + // to only disallow SP relative references in the live range of + // the VLA(s). In practice, it's unclear how much difference that + // would make, but it may be worth doing. + if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset)) + return false; + + // The offset likely isn't legal, we want to allocate a virtual base register. + return true; +} + +/// materializeFrameBaseRegister - Insert defining instruction(s) for +/// BaseReg to be a pointer to FrameIdx before insertion point I. +void ARMBaseRegisterInfo:: +materializeFrameBaseRegister(MachineBasicBlock::iterator I, unsigned BaseReg, + int FrameIdx, int64_t Offset) const { + ARMFunctionInfo *AFI = + I->getParent()->getParent()->getInfo<ARMFunctionInfo>(); + unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : + (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri); + + MachineInstrBuilder MIB = + BuildMI(*I->getParent(), I, I->getDebugLoc(), TII.get(ADDriOpc), BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset); + if (!AFI->isThumb1OnlyFunction()) + AddDefaultCC(AddDefaultPred(MIB)); +} + +void +ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const { + MachineInstr &MI = *I; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + int Off = Offset; // ARM doesn't need the general 64-bit offsets + unsigned i = 0; + + assert(!AFI->isThumb1OnlyFunction() && + "This resolveFrameIndex does not support Thumb1!"); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + bool Done = false; + if (!AFI->isThumbFunction()) + Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII); + else { + assert(AFI->isThumb2Function()); + Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII); + } + assert (Done && "Unable to resolve frame index!"); +} + +bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + int64_t Offset) const { + const TargetInstrDesc &Desc = MI->getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + unsigned i = 0; + + while (!MI->getOperand(i).isFI()) { + ++i; + assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); + } + + // AddrMode4 and AddrMode6 cannot handle any offset. + if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) + return Offset == 0; + + unsigned NumBits = 0; + unsigned Scale = 1; + bool isSigned = true; + switch (AddrMode) { + case ARMII::AddrModeT2_i8: + case ARMII::AddrModeT2_i12: + // i8 supports only negative, and i12 supports only positive, so + // based on Offset sign, consider the appropriate instruction + Scale = 1; + if (Offset < 0) { + NumBits = 8; + Offset = -Offset; + } else { + NumBits = 12; + } + break; + case ARMII::AddrMode5: + // VFP address mode. + NumBits = 8; + Scale = 4; + break; + case ARMII::AddrMode2: + NumBits = 12; + break; + case ARMII::AddrMode3: + NumBits = 8; + break; + case ARMII::AddrModeT1_s: + NumBits = 5; + Scale = 4; + isSigned = false; + break; + default: + llvm_unreachable("Unsupported addressing mode!"); + break; + } + + Offset += getFrameIndexInstrOffset(MI, i); + // Make sure the offset is encodable for instructions that scale the + // immediate. + if ((Offset & (Scale-1)) != 0) + return false; + + if (isSigned && Offset < 0) + Offset = -Offset; + + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) + return true; + + return false; +} + +void ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); @@ -1325,16 +1691,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); unsigned FrameReg; - int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg); - if (FrameReg != ARM::SP) - SPAdj = 0; - Offset += SPAdj; + int Offset = ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); // Special handling of dbg_value instructions. if (MI.isDebugValue()) { MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); MI.getOperand(i+1).ChangeToImmediate(Offset); - return 0; + return; } // Modify MI as necessary to handle as much of 'Offset' as possible @@ -1346,7 +1709,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); } if (Done) - return 0; + return; // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above, handle the rest, providing a register that is @@ -1366,10 +1729,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); else { ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); - if (Value) { - Value->first = FrameReg; // use the frame register as a kind indicator - Value->second = Offset; - } if (!AFI->isThumbFunction()) emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, Pred, PredReg, TII); @@ -1379,10 +1738,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, Pred, PredReg, TII); } MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); - if (!ReuseFrameIndexVals) - ScratchReg = 0; } - return ScratchReg; } /// Move iterator past the next bunch of callee save load / store ops for @@ -1494,7 +1850,8 @@ emitPrologue(MachineFunction &MF) const { // Otherwise, if this is not Darwin, all the callee-saved registers go // into spill area 1, including the FP in R11. In either case, it is // now safe to emit this assignment. - if (STI.isTargetDarwin() || hasFP(MF)) { + bool HasFP = hasFP(MF); + if (HasFP) { unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) @@ -1513,7 +1870,7 @@ emitPrologue(MachineFunction &MF) const { unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - if (STI.isTargetDarwin() || hasFP(MF)) + if (HasFP) AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); @@ -1525,18 +1882,22 @@ emitPrologue(MachineFunction &MF) const { if (NumBytes) { // Adjust SP after all the callee-save spills. emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + if (HasFP) + AFI->setShouldRestoreSPFromFP(true); } if (STI.isTargetELF() && hasFP(MF)) { MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); + AFI->setShouldRestoreSPFromFP(true); } AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); - // If we need dynamic stack realignment, do it here. + // If we need dynamic stack realignment, do it here. Be paranoid and make + // sure if we also have VLAs, we have a base pointer for frame access. if (needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); @@ -1562,7 +1923,28 @@ emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(ARM::R4, RegState::Kill); } + + AFI->setShouldRestoreSPFromFP(true); + } + + // If we need a base pointer, set it up here. It's whatever the value + // of the stack pointer is at this point. Any variable size objects + // will be allocated after this, so we can still use the base pointer + // to reference locals. + if (hasBasePointer(MF)) { + if (isARM) + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), BasePtr) + .addReg(ARM::SP) + .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr) + .addReg(ARM::SP); } + + // If the frame has variable sized objects then the epilogue must restore + // the sp from fp. + if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects()) + AFI->setShouldRestoreSPFromFP(true); } static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { @@ -1617,34 +1999,25 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - bool HasFP = hasFP(MF); - if ((STI.isTargetDarwin() && NumBytes) || HasFP) { + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; - // Reset SP based on frame pointer only if the stack frame extends beyond - // frame pointer stack slot or target is ELF and the function has FP. - if (HasFP || - AFI->getGPRCalleeSavedArea2Size() || - AFI->getDPRCalleeSavedAreaSize() || - AFI->getDPRCalleeSavedAreaOffset()) { - if (NumBytes) { - if (isARM) - emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, - ARMCC::AL, 0, TII); - else - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, - ARMCC::AL, 0, TII); - } else { - // Thumb2 or ARM. - if (isARM) - BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) - .addReg(FramePtr) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); - else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) - .addReg(FramePtr); - } + if (NumBytes) { + if (isARM) + emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + else + emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + } else { + // Thumb2 or ARM. + if (isARM) + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) + .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) + .addReg(FramePtr); } } else if (NumBytes) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); @@ -1670,7 +2043,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi) { - BuildMI(MBB, MBBI, dl, + BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)). addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); @@ -1685,7 +2058,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { } else if (RetOpcode == ARM::TCRETURNriND) { BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). addReg(JumpTarget.getReg(), RegState::Kill); - } + } MachineInstr *NewMI = prior(MBBI); for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index f7ee0d5cc66d..fa2eb6c10498 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -44,7 +44,7 @@ static inline bool isARMLowRegister(unsigned Reg) { } } -struct ARMBaseRegisterInfo : public ARMGenRegisterInfo { +class ARMBaseRegisterInfo : public ARMGenRegisterInfo { protected: const ARMBaseInstrInfo &TII; const ARMSubtarget &STI; @@ -52,6 +52,11 @@ protected: /// FramePtr - ARM physical register used as frame ptr. unsigned FramePtr; + /// BasePtr - ARM physical register used as a base ptr in complex stack + /// frames. I.e., when we need a 3rd base, not just SP and FP, due to + /// variable size stack objects. + unsigned BasePtr; + // Can be only subclassed. explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); @@ -102,9 +107,18 @@ public: MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; + bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; + int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const; + bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const; + void materializeFrameBaseRegister(MachineBasicBlock::iterator I, + unsigned BaseReg, int FrameIdx, + int64_t Offset) const; + void resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const; + bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const; bool cannotEliminateFrame(const MachineFunction &MF) const; @@ -116,6 +130,8 @@ public: unsigned getFrameRegister(const MachineFunction &MF) const; int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; + int ResolveFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg, int SPAdj) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; // Exception handling queries. @@ -144,16 +160,17 @@ public: virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const; - virtual bool hasReservedCallFrame(MachineFunction &MF) const; - virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const; + virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const; + + virtual bool hasReservedCallFrame(const MachineFunction &MF) const; + virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const; virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; virtual void emitPrologue(MachineFunction &MF) const; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 8fdb07f81626..293e32aa5376 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -1,4 +1,4 @@ -//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===// +//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -68,7 +68,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[ "ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, - CCIfType<[i32], CCIfAlign<"8", CCAssignToStack<4, 8>>>, + CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[f64], CCAssignToStack<8, 8>>, CCIfType<[v2f64], CCAssignToStack<16, 8>> diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 7895cb071922..b1a702f90cfc 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -65,7 +65,7 @@ namespace { static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(&ID), JTI(0), + : MachineFunctionPass(ID), JTI(0), II((const ARMInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), @@ -124,6 +124,8 @@ namespace { void emitMiscArithInstruction(const MachineInstr &MI); + void emitSaturateInstruction(const MachineInstr &MI); + void emitBranchInstruction(const MachineInstr &MI); void emitInlineJumpTable(unsigned JTIndex); @@ -389,6 +391,9 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { case ARMII::ArithMiscFrm: emitMiscArithInstruction(MI); break; + case ARMII::SatFrm: + emitSaturateInstruction(MI); + break; case ARMII::BrFrm: emitBranchInstruction(MI); break; @@ -654,6 +659,19 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { switch (Opcode) { default: llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); + case ARM::BX: + case ARM::BMOVPCRX: + case ARM::BXr9: + case ARM::BMOVPCRXr9: { + // First emit mov lr, pc + unsigned Binary = 0x01a0e00f; + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + emitWordLE(Binary); + + // and then emit the branch. + emitMiscBranchInstruction(MI); + break; + } case TargetOpcode::INLINEASM: { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. @@ -662,7 +680,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { } break; } - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; @@ -1209,12 +1227,58 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { // Encode shift_imm. unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); + if (TID.Opcode == ARM::PKHTB) { + assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!"); + if (ShiftAmt == 32) + ShiftAmt = 0; + } assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); Binary |= ShiftAmt << ARMII::ShiftShift; emitWordLE(Binary); } +void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGen. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode saturate bit position. + unsigned Pos = MI.getOperand(1).getImm(); + if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16) + Pos -= 1; + assert((Pos < 16 || (Pos < 32 && + TID.Opcode != ARM::SSAT16 && + TID.Opcode != ARM::USAT16)) && + "saturate bit position out of range"); + Binary |= Pos << 16; + + // Encode Rm + Binary |= getMachineOpValue(MI, 2); + + // Encode shift_imm. + if (TID.getNumOperands() == 4) { + unsigned ShiftOp = MI.getOperand(3).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + if (Opc == ARM_AM::asr) + Binary |= (1 << 6); + unsigned ShiftAmt = MI.getOperand(3).getImm(); + if (ShiftAmt == 32 && Opc == ARM_AM::asr) + ShiftAmt = 0; + assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); + Binary |= ShiftAmt << ARMII::ShiftShift; + } + + emitWordLE(Binary); +} + void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); @@ -1485,7 +1549,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { // Set addressing mode by modifying bits U(23) and P(24) const MachineOperand &MO = MI.getOperand(OpIdx++); - Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm())); + Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm())); // Set bit W(21) if (IsUpdating) @@ -1494,7 +1558,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { // First register is encoded in Dd. Binary |= encodeVFPRd(MI, OpIdx+2); - // Number of registers are encoded in offset field. + // Count the number of registers. unsigned NumRegs = 1; for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 65a3da6f1617..60e923bd2c38 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -18,9 +18,9 @@ #include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMInstrInfo.h" +#include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -165,7 +165,7 @@ namespace { /// HasInlineAsm - True if the function contains inline assembly. bool HasInlineAsm; - const TargetInstrInfo *TII; + const ARMInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; bool isThumb; @@ -173,7 +173,7 @@ namespace { bool isThumb2; public: static char ID; - ARMConstantIslands() : MachineFunctionPass(&ID) {} + ARMConstantIslands() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -272,7 +272,7 @@ FunctionPass *llvm::createARMConstantIslandPass() { bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { MachineConstantPool &MCP = *MF.getConstantPool(); - TII = MF.getTarget().getInstrInfo(); + TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo(); AFI = MF.getInfo<ARMFunctionInfo>(); STI = &MF.getTarget().getSubtarget<ARMSubtarget>(); @@ -323,6 +323,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // constant pool users. InitialFunctionScan(MF, CPEMIs); CPEMIs.clear(); + DEBUG(dumpBBs()); + /// Remove dead constant pool entries. RemoveUnusedCPEntries(); @@ -355,7 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { } // Shrink 32-bit Thumb2 branch, load, and store instructions. - if (isThumb2) + if (isThumb2 && !STI->prefers32BitThumb()) MadeChange |= OptimizeThumb2Instructions(MF); // After a while, this might be made debug-only, but it is not expensive. @@ -366,6 +368,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) MadeChange |= UndoLRSpillRestore(); + DEBUG(errs() << '\n'; dumpBBs()); + BBSizes.clear(); BBOffsets.clear(); WaterList.clear(); @@ -509,6 +513,10 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, case ARM::tBR_JTr: // A Thumb1 table jump may involve padding; for the offsets to // be right, functions containing these must be 4-byte aligned. + // tBR_JTr expands to a mov pc followed by .align 2 and then the jump + // table entries. So this code checks whether offset of tBR_JTr + 2 + // is aligned. That is held in Offset+MBBSize, which already has + // 2 added in for the size of the mov pc instruction. MF.EnsureAlignment(2U); if ((Offset+MBBSize)%4 != 0 || HasInlineAsm) // FIXME: Add a pseudo ALIGN instruction instead. @@ -768,28 +776,54 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { WaterList.insert(IP, OrigBB); NewWaterList.insert(OrigBB); - // Figure out how large the first NewMBB is. (It cannot - // contain a constpool_entry or tablejump.) - unsigned NewBBSize = 0; - for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); - I != E; ++I) - NewBBSize += TII->GetInstSizeInBytes(I); - unsigned OrigBBI = OrigBB->getNumber(); unsigned NewBBI = NewBB->getNumber(); - // Set the size of NewBB in BBSizes. - BBSizes[NewBBI] = NewBBSize; - // We removed instructions from UserMBB, subtract that off from its size. - // Add 2 or 4 to the block to count the unconditional branch we added to it. int delta = isThumb1 ? 2 : 4; - BBSizes[OrigBBI] -= NewBBSize - delta; + + // Figure out how large the OrigBB is. As the first half of the original + // block, it cannot contain a tablejump. The size includes + // the new jump we added. (It should be possible to do this without + // recounting everything, but it's very confusing, and this is rarely + // executed.) + unsigned OrigBBSize = 0; + for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end(); + I != E; ++I) + OrigBBSize += TII->GetInstSizeInBytes(I); + BBSizes[OrigBBI] = OrigBBSize; // ...and adjust BBOffsets for NewBB accordingly. BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI]; + // Figure out how large the NewMBB is. As the second half of the original + // block, it may contain a tablejump. + unsigned NewBBSize = 0; + for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); + I != E; ++I) + NewBBSize += TII->GetInstSizeInBytes(I); + // Set the size of NewBB in BBSizes. It does not include any padding now. + BBSizes[NewBBI] = NewBBSize; + + MachineInstr* ThumbJTMI = prior(NewBB->end()); + if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { + // We've added another 2-byte instruction before this tablejump, which + // means we will always need padding if we didn't before, and vice versa. + + // The original offset of the jump instruction was: + unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta; + if (OrigOffset%4 == 0) { + // We had padding before and now we don't. No net change in code size. + delta = 0; + } else { + // We didn't have padding before and now we do. + BBSizes[NewBBI] += 2; + delta = 4; + } + } + // All BBOffsets following these blocks must be modified. - AdjustBBOffsetsAfter(NewBB, delta); + if (delta) + AdjustBBOffsetsAfter(NewBB, delta); return NewBB; } @@ -915,6 +949,10 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, } // Thumb1 jump tables require padding. They should be at the end; // following unconditional branches are removed by AnalyzeBranch. + // tBR_JTr expands to a mov pc followed by .align 2 and then the jump + // table entries. So this code checks whether offset of tBR_JTr + // is aligned; if it is, the offset of the jump table following the + // instruction will not be aligned, and we need padding. MachineInstr *ThumbJTMI = prior(MBB->end()); if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { unsigned NewMIOffset = GetOffsetOf(ThumbJTMI); @@ -1143,11 +1181,13 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, MachineBasicBlock::iterator MI = UserMI; ++MI; unsigned CPUIndex = CPUserIndex+1; + unsigned NumCPUsers = CPUsers.size(); + MachineInstr *LastIT = 0; for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); Offset < BaseInsertOffset; Offset += TII->GetInstSizeInBytes(MI), - MI = llvm::next(MI)) { - if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) { + MI = llvm::next(MI)) { + if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { CPUser &U = CPUsers[CPUIndex]; if (!OffsetIsInRange(Offset, EndInsertOffset, U.MaxDisp, U.NegOk, U.IsSoImm)) { @@ -1159,9 +1199,23 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm(); CPUIndex++; } + + // Remember the last IT instruction. + if (MI->getOpcode() == ARM::t2IT) + LastIT = MI; } + DEBUG(errs() << "Split in middle of big block\n"); - NewMBB = SplitBlockBeforeInstr(prior(MI)); + --MI; + + // Avoid splitting an IT block. + if (LastIT) { + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + if (CC != ARMCC::AL) + MI = LastIT; + } + NewMBB = SplitBlockBeforeInstr(MI); } } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 9c62597b4323..fc2e3c3fadae 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -19,14 +19,21 @@ #include "ARMBaseInstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" - +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; namespace { class ARMExpandPseudo : public MachineFunctionPass { + // Constants for register spacing in NEON load/store instructions. + enum NEONRegSpacing { + SingleSpc, + EvenDblSpc, + OddDblSpc + }; + public: static char ID; - ARMExpandPseudo() : MachineFunctionPass(&ID) {} + ARMExpandPseudo() : MachineFunctionPass(ID) {} const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -41,6 +48,10 @@ namespace { void TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMBB(MachineBasicBlock &MBB); + void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc, + bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs); + void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc, + bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs); }; char ARMExpandPseudo::ID = 0; } @@ -63,6 +74,129 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, } } +/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register +/// operands to real VLD instructions with D register operands. +void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI, + unsigned Opc, bool hasWriteBack, + NEONRegSpacing RegSpc, unsigned NumRegs) { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + unsigned OpIdx = 0; + + bool DstIsDead = MI.getOperand(OpIdx).isDead(); + unsigned DstReg = MI.getOperand(OpIdx++).getReg(); + unsigned D0, D1, D2, D3; + if (RegSpc == SingleSpc) { + D0 = TRI->getSubReg(DstReg, ARM::dsub_0); + D1 = TRI->getSubReg(DstReg, ARM::dsub_1); + D2 = TRI->getSubReg(DstReg, ARM::dsub_2); + D3 = TRI->getSubReg(DstReg, ARM::dsub_3); + } else if (RegSpc == EvenDblSpc) { + D0 = TRI->getSubReg(DstReg, ARM::dsub_0); + D1 = TRI->getSubReg(DstReg, ARM::dsub_2); + D2 = TRI->getSubReg(DstReg, ARM::dsub_4); + D3 = TRI->getSubReg(DstReg, ARM::dsub_6); + } else { + assert(RegSpc == OddDblSpc && "unknown register spacing for VLD"); + D0 = TRI->getSubReg(DstReg, ARM::dsub_1); + D1 = TRI->getSubReg(DstReg, ARM::dsub_3); + D2 = TRI->getSubReg(DstReg, ARM::dsub_5); + D3 = TRI->getSubReg(DstReg, ARM::dsub_7); + } + MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 2) + MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 3) + MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); + + if (hasWriteBack) { + bool WBIsDead = MI.getOperand(OpIdx).isDead(); + unsigned WBReg = MI.getOperand(OpIdx++).getReg(); + MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead)); + } + // Copy the addrmode6 operands. + bool AddrIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill)); + MIB.addImm(MI.getOperand(OpIdx++).getImm()); + if (hasWriteBack) { + // Copy the am6offset operand. + bool OffsetIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill)); + } + + MIB = AddDefaultPred(MIB); + TransferImpOps(MI, MIB, MIB); + // For an instruction writing the odd subregs, add an implicit use of the + // super-register because the even subregs were loaded separately. + if (RegSpc == OddDblSpc) + MIB.addReg(DstReg, RegState::Implicit); + // Add an implicit def for the super-register. + MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); + MI.eraseFromParent(); +} + +/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register +/// operands to real VST instructions with D register operands. +void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI, + unsigned Opc, bool hasWriteBack, + NEONRegSpacing RegSpc, unsigned NumRegs) { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + unsigned OpIdx = 0; + if (hasWriteBack) { + bool DstIsDead = MI.getOperand(OpIdx).isDead(); + unsigned DstReg = MI.getOperand(OpIdx++).getReg(); + MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); + } + // Copy the addrmode6 operands. + bool AddrIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill)); + MIB.addImm(MI.getOperand(OpIdx++).getImm()); + if (hasWriteBack) { + // Copy the am6offset operand. + bool OffsetIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill)); + } + + bool SrcIsKill = MI.getOperand(OpIdx).isKill(); + unsigned SrcReg = MI.getOperand(OpIdx).getReg(); + unsigned D0, D1, D2, D3; + if (RegSpc == SingleSpc) { + D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); + D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); + D2 = TRI->getSubReg(SrcReg, ARM::dsub_2); + D3 = TRI->getSubReg(SrcReg, ARM::dsub_3); + } else if (RegSpc == EvenDblSpc) { + D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); + D1 = TRI->getSubReg(SrcReg, ARM::dsub_2); + D2 = TRI->getSubReg(SrcReg, ARM::dsub_4); + D3 = TRI->getSubReg(SrcReg, ARM::dsub_6); + } else { + assert(RegSpc == OddDblSpc && "unknown register spacing for VST"); + D0 = TRI->getSubReg(SrcReg, ARM::dsub_1); + D1 = TRI->getSubReg(SrcReg, ARM::dsub_3); + D2 = TRI->getSubReg(SrcReg, ARM::dsub_5); + D3 = TRI->getSubReg(SrcReg, ARM::dsub_7); + } + + MIB.addReg(D0).addReg(D1); + if (NumRegs > 2) + MIB.addReg(D2); + if (NumRegs > 3) + MIB.addReg(D3); + MIB = AddDefaultPred(MIB); + TransferImpOps(MI, MIB, MIB); + if (SrcIsKill) + // Add an implicit kill for the super-reg. + (*MIB).addRegisterKilled(SrcReg, TRI, true); + MI.eraseFromParent(); +} + bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -71,9 +205,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineInstr &MI = *MBBI; MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); + bool ModifiedOp = true; unsigned Opcode = MI.getOpcode(); switch (Opcode) { - default: break; + default: + ModifiedOp = false; + break; + case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) @@ -92,10 +230,10 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { .addOperand(MI.getOperand(2)); TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); - Modified = true; break; } + case ARM::MOVi32imm: case ARM::t2MOVi32imm: { unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); @@ -104,9 +242,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { const MachineOperand &MO = MI.getOperand(1); MachineInstrBuilder LO16, HI16; - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == ARM::MOVi32imm ? + ARM::MOVi16 : ARM::t2MOVi16), DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == ARM::MOVi32imm ? + ARM::MOVTi16 : ARM::t2MOVTi16)) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) .addReg(DstReg); @@ -128,7 +270,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { HI16.addImm(Pred).addReg(PredReg); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); - Modified = true; break; } @@ -155,9 +296,211 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { .addReg(OddSrc, getKillRegState(SrcIsKill))); TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); - Modified = true; } + + case ARM::VLD1q8Pseudo: + ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break; + case ARM::VLD1q16Pseudo: + ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break; + case ARM::VLD1q32Pseudo: + ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break; + case ARM::VLD1q64Pseudo: + ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break; + case ARM::VLD1q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break; + case ARM::VLD1q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break; + case ARM::VLD1q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break; + case ARM::VLD1q64Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break; + + case ARM::VLD2d8Pseudo: + ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break; + case ARM::VLD2d16Pseudo: + ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break; + case ARM::VLD2d32Pseudo: + ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break; + case ARM::VLD2q8Pseudo: + ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break; + case ARM::VLD2q16Pseudo: + ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break; + case ARM::VLD2q32Pseudo: + ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break; + case ARM::VLD2d8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break; + case ARM::VLD2d16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break; + case ARM::VLD2d32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break; + case ARM::VLD2q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break; + case ARM::VLD2q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break; + case ARM::VLD2q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break; + + case ARM::VLD3d8Pseudo: + ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break; + case ARM::VLD3d16Pseudo: + ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break; + case ARM::VLD3d32Pseudo: + ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break; + case ARM::VLD1d64TPseudo: + ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break; + case ARM::VLD3d8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break; + case ARM::VLD3d16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break; + case ARM::VLD3d32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break; + case ARM::VLD1d64TPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break; + case ARM::VLD3q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q8oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break; + case ARM::VLD3q16oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break; + case ARM::VLD3q32oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break; + + case ARM::VLD4d8Pseudo: + ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break; + case ARM::VLD4d16Pseudo: + ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break; + case ARM::VLD4d32Pseudo: + ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break; + case ARM::VLD1d64QPseudo: + ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break; + case ARM::VLD4d8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break; + case ARM::VLD4d16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break; + case ARM::VLD4d32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break; + case ARM::VLD1d64QPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break; + case ARM::VLD4q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q8oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break; + case ARM::VLD4q16oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break; + case ARM::VLD4q32oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break; + + case ARM::VST1q8Pseudo: + ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break; + case ARM::VST1q16Pseudo: + ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break; + case ARM::VST1q32Pseudo: + ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break; + case ARM::VST1q64Pseudo: + ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break; + case ARM::VST1q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break; + case ARM::VST1q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break; + case ARM::VST1q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break; + case ARM::VST1q64Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break; + + case ARM::VST2d8Pseudo: + ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break; + case ARM::VST2d16Pseudo: + ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break; + case ARM::VST2d32Pseudo: + ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break; + case ARM::VST2q8Pseudo: + ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break; + case ARM::VST2q16Pseudo: + ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break; + case ARM::VST2q32Pseudo: + ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break; + case ARM::VST2d8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break; + case ARM::VST2d16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break; + case ARM::VST2d32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break; + case ARM::VST2q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break; + case ARM::VST2q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break; + case ARM::VST2q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break; + + case ARM::VST3d8Pseudo: + ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break; + case ARM::VST3d16Pseudo: + ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break; + case ARM::VST3d32Pseudo: + ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break; + case ARM::VST1d64TPseudo: + ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break; + case ARM::VST3d8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break; + case ARM::VST3d16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break; + case ARM::VST3d32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break; + case ARM::VST1d64TPseudo_UPD: + ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break; + case ARM::VST3q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q8oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break; + case ARM::VST3q16oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break; + case ARM::VST3q32oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break; + + case ARM::VST4d8Pseudo: + ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break; + case ARM::VST4d16Pseudo: + ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break; + case ARM::VST4d32Pseudo: + ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break; + case ARM::VST1d64QPseudo: + ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break; + case ARM::VST4d8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break; + case ARM::VST4d16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break; + case ARM::VST4d32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break; + case ARM::VST1d64QPseudo_UPD: + ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break; + case ARM::VST4q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break; + case ARM::VST4q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break; + case ARM::VST4q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break; + case ARM::VST4q8oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break; + case ARM::VST4q16oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break; + case ARM::VST4q32oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break; } + + if (ModifiedOp) + Modified = true; MBBI = NMBBI; } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp new file mode 100644 index 000000000000..4892eae95833 --- /dev/null +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -0,0 +1,665 @@ +//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ARM-specific support for the FastISel class. Some +// of the target-specific code is generated by tablegen in the file +// ARMGenFastISel.inc, which is #included here. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMRegisterInfo.h" +#include "ARMTargetMachine.h" +#include "ARMSubtarget.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +static cl::opt<bool> +EnableARMFastISel("arm-fast-isel", + cl::desc("Turn on experimental ARM fast-isel support"), + cl::init(false), cl::Hidden); + +namespace { + +class ARMFastISel : public FastISel { + + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + const TargetMachine &TM; + const TargetInstrInfo &TII; + const TargetLowering &TLI; + const ARMFunctionInfo *AFI; + + // Convenience variable to avoid checking all the time. + bool isThumb; + + public: + explicit ARMFastISel(FunctionLoweringInfo &funcInfo) + : FastISel(funcInfo), + TM(funcInfo.MF->getTarget()), + TII(*TM.getInstrInfo()), + TLI(*TM.getTargetLowering()) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); + isThumb = AFI->isThumbFunction(); + } + + // Code from FastISel.cpp. + virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass *RC); + virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); + virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm); + virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm); + virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm); + virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, + unsigned Op0, bool Op0IsKill, + uint32_t Idx); + + // Backend specific FastISel code. + virtual bool TargetSelectInstruction(const Instruction *I); + virtual unsigned TargetMaterializeConstant(const Constant *C); + + #include "ARMGenFastISel.inc" + + // Instruction selection routines. + virtual bool ARMSelectLoad(const Instruction *I); + virtual bool ARMSelectStore(const Instruction *I); + virtual bool ARMSelectBranch(const Instruction *I); + + // Utility routines. + private: + bool isTypeLegal(const Type *Ty, EVT &VT); + bool isLoadTypeLegal(const Type *Ty, EVT &VT); + bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset); + bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset); + bool ARMLoadAlloca(const Instruction *I); + bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg); + bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset); + bool ARMMaterializeConstant(const ConstantInt *Val, unsigned &Reg); + + bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); + const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); +}; + +} // end anonymous namespace + +// #include "ARMGenCallingConv.inc" + +// DefinesOptionalPredicate - This is different from DefinesPredicate in that +// we don't care about implicit defs here, just places we'll need to add a +// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. +bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.hasOptionalDef()) + return false; + + // Look to see if our OptionalDef is defining CPSR or CCR. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + if (MO.getReg() == ARM::CPSR) + *CPSR = true; + } + return true; +} + +// If the machine is predicable go ahead and add the predicate operands, if +// it needs default CC operands add those. +const MachineInstrBuilder & +ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { + MachineInstr *MI = &*MIB; + + // Do we use a predicate? + if (TII.isPredicable(MI)) + AddDefaultPred(MIB); + + // Do we optionally set a predicate? Preds is size > 0 iff the predicate + // defines CPSR. All other OptionalDefines in ARM are the CCR register. + bool CPSR = false; + if (DefinesOptionalPredicate(MI, &CPSR)) { + if (CPSR) + AddDefaultT1CC(MIB); + else + AddDefaultCC(MIB); + } + return MIB; +} + +unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass* RC) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(Imm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, + unsigned Op0, bool Op0IsKill, + uint32_t Idx) { + unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + assert(TargetRegisterInfo::isVirtualRegister(Op0) && + "Cannot yet extract from physregs"); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx)); + return ResultReg; +} + +unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { + EVT VT = TLI.getValueType(C->getType(), true); + + // Only handle simple types. + if (!VT.isSimple()) return 0; + + // TODO: This should be safe for fp because they're just bits from the + // Constant. + // TODO: Theoretically we could materialize fp constants with instructions + // from VFP3. + + // MachineConstantPool wants an explicit alignment. + unsigned Align = TD.getPrefTypeAlignment(C->getType()); + if (Align == 0) { + // TODO: Figure out if this is correct. + Align = TD.getTypeAllocSize(C->getType()); + } + unsigned Idx = MCP.getConstantPoolIndex(C, Align); + + unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + // Different addressing modes between ARM/Thumb2 for constant pool loads. + if (isThumb) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::t2LDRpci)) + .addReg(DestReg).addConstantPoolIndex(Idx)); + else + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::LDRcp)) + .addReg(DestReg).addConstantPoolIndex(Idx) + .addReg(0).addImm(0)); + + return DestReg; +} + +bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) { + VT = TLI.getValueType(Ty, true); + + // Only handle simple types. + if (VT == MVT::Other || !VT.isSimple()) return false; + + // Handle all legal types, i.e. a register that will directly hold this + // value. + return TLI.isTypeLegal(VT); +} + +bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) { + if (isTypeLegal(Ty, VT)) return true; + + // If this is a type than can be sign or zero-extended to a basic operation + // go ahead and accept it now. + if (VT == MVT::i8 || VT == MVT::i16) + return true; + + return false; +} + +// Computes the Reg+Offset to get to an object. +bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg, + int &Offset) { + // Some boilerplate from the X86 FastISel. + const User *U = NULL; + unsigned Opcode = Instruction::UserOp1; + if (const Instruction *I = dyn_cast<Instruction>(Obj)) { + // Don't walk into other basic blocks; it's possible we haven't + // visited them yet, so the instructions may not yet be assigned + // virtual registers. + if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) + return false; + + Opcode = I->getOpcode(); + U = I; + } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { + Opcode = C->getOpcode(); + U = C; + } + + if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) + if (Ty->getAddressSpace() > 255) + // Fast instruction selection doesn't support the special + // address spaces. + return false; + + switch (Opcode) { + default: + //errs() << "Failing Opcode is: " << *Op1 << "\n"; + break; + case Instruction::Alloca: { + assert(false && "Alloca should have been handled earlier!"); + return false; + } + } + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { + //errs() << "Failing GV is: " << GV << "\n"; + (void)GV; + return false; + } + + // Try to get this in a register if nothing else has worked. + Reg = getRegForValue(Obj); + if (Reg == 0) return false; + + // Since the offset may be too large for the load instruction + // get the reg+offset into a register. + // TODO: Verify the additions work, otherwise we'll need to add the + // offset instead of 0 to the instructions and do all sorts of operand + // munging. + // TODO: Optimize this somewhat. + if (Offset != 0) { + ARMCC::CondCodes Pred = ARMCC::AL; + unsigned PredReg = 0; + + if (!isThumb) + emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + Reg, Reg, Offset, Pred, PredReg, + static_cast<const ARMBaseInstrInfo&>(TII)); + else { + assert(AFI->isThumb2Function()); + emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + Reg, Reg, Offset, Pred, PredReg, + static_cast<const ARMBaseInstrInfo&>(TII)); + } + } + + return true; +} + +bool ARMFastISel::ARMLoadAlloca(const Instruction *I) { + Value *Op0 = I->getOperand(0); + + // Verify it's an alloca. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + unsigned ResultReg = createResultReg(RC); + TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, + ResultReg, SI->second, RC, + TM.getRegisterInfo()); + UpdateValueMap(I, ResultReg); + return true; + } + } + return false; +} + +bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, + unsigned Reg, int Offset) { + + assert(VT.isSimple() && "Non-simple types are invalid here!"); + unsigned Opc; + + switch (VT.getSimpleVT().SimpleTy) { + default: + assert(false && "Trying to emit for an unhandled type!"); + return false; + case MVT::i16: + Opc = isThumb ? ARM::tLDRH : ARM::LDRH; + VT = MVT::i32; + break; + case MVT::i8: + Opc = isThumb ? ARM::tLDRB : ARM::LDRB; + VT = MVT::i32; + break; + case MVT::i32: + Opc = isThumb ? ARM::tLDR : ARM::LDR; + break; + } + + ResultReg = createResultReg(TLI.getRegClassFor(VT)); + + // TODO: Fix the Addressing modes so that these can share some code. + // Since this is a Thumb1 load this will work in Thumb1 or 2 mode. + if (isThumb) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(Reg).addImm(Offset).addReg(0)); + else + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(Reg).addReg(0).addImm(Offset)); + + return true; +} + +bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg) { + Value *Op1 = I->getOperand(1); + + // Verify it's an alloca. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + assert(SrcReg != 0 && "Nothing to store!"); + TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, + SrcReg, true /*isKill*/, SI->second, RC, + TM.getRegisterInfo()); + return true; + } + } + return false; +} + +bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, + unsigned DstReg, int Offset) { + unsigned StrOpc; + switch (VT.getSimpleVT().SimpleTy) { + default: return false; + case MVT::i1: + case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break; + case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break; + case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break; + case MVT::f32: + if (!Subtarget->hasVFP2()) return false; + StrOpc = ARM::VSTRS; + break; + case MVT::f64: + if (!Subtarget->hasVFP2()) return false; + StrOpc = ARM::VSTRD; + break; + } + + if (isThumb) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(StrOpc), SrcReg) + .addReg(DstReg).addImm(Offset).addReg(0)); + else + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(StrOpc), SrcReg) + .addReg(DstReg).addReg(0).addImm(Offset)); + + return true; +} + +bool ARMFastISel::ARMSelectStore(const Instruction *I) { + Value *Op0 = I->getOperand(0); + unsigned SrcReg = 0; + + // Yay type legalization + EVT VT; + if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) + return false; + + // Get the value to be stored into a register. + SrcReg = getRegForValue(Op0); + if (SrcReg == 0) + return false; + + // If we're an alloca we know we have a frame index and can emit the store + // quickly. + if (ARMStoreAlloca(I, SrcReg)) + return true; + + // Our register and offset with innocuous defaults. + unsigned Reg = 0; + int Offset = 0; + + // See if we can handle this as Reg + Offset + if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset)) + return false; + + if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false; + + return false; + +} + +bool ARMFastISel::ARMSelectLoad(const Instruction *I) { + // If we're an alloca we know we have a frame index and can emit the load + // directly in short order. + if (ARMLoadAlloca(I)) + return true; + + // Verify we have a legal type before going any further. + EVT VT; + if (!isLoadTypeLegal(I->getType(), VT)) + return false; + + // Our register and offset with innocuous defaults. + unsigned Reg = 0; + int Offset = 0; + + // See if we can handle this as Reg + Offset + if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset)) + return false; + + unsigned ResultReg; + if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool ARMFastISel::ARMSelectBranch(const Instruction *I) { + const BranchInst *BI = cast<BranchInst>(I); + MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; + + // Simple branch support. + unsigned CondReg = getRegForValue(BI->getCondition()); + if (CondReg == 0) return false; + + unsigned CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; + unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + .addReg(CondReg).addReg(CondReg)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) + .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + FastEmitBranch(FBB, DL); + FuncInfo.MBB->addSuccessor(TBB); + return true; +} + +// TODO: SoftFP support. +bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { + // No Thumb-1 for now. + if (isThumb && !AFI->isThumb2Function()) return false; + + switch (I->getOpcode()) { + case Instruction::Load: + return ARMSelectLoad(I); + case Instruction::Store: + return ARMSelectStore(I); + case Instruction::Br: + return ARMSelectBranch(I); + default: break; + } + return false; +} + +namespace llvm { + llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { + if (EnableARMFastISel) return new ARMFastISel(funcInfo); + return 0; + } +} diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp new file mode 100644 index 000000000000..85b0c6c248d0 --- /dev/null +++ b/lib/Target/ARM/ARMGlobalMerge.cpp @@ -0,0 +1,212 @@ +//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass merges globals with internal linkage into one. This way all the +// globals which were merged into a biggest one can be addressed using offsets +// from the same base pointer (no need for separate base pointer for each of the +// global). Such a transformation can significantly reduce the register pressure +// when many globals are involved. +// +// For example, consider the code which touches several global variables at once: +// +// static int foo[N], bar[N], baz[N]; +// +// for (i = 0; i < N; ++i) { +// foo[i] = bar[i] * baz[i]; +// } +// +// On ARM the addresses of 3 arrays should be kept in the registers, thus +// this code has quite large register pressure (loop body): +// +// ldr r1, [r5], #4 +// ldr r2, [r6], #4 +// mul r1, r2, r1 +// str r1, [r0], #4 +// +// Pass converts the code to something like: +// +// static struct { +// int foo[N]; +// int bar[N]; +// int baz[N]; +// } merged; +// +// for (i = 0; i < N; ++i) { +// merged.foo[i] = merged.bar[i] * merged.baz[i]; +// } +// +// and in ARM code this becomes: +// +// ldr r0, [r5, #40] +// ldr r1, [r5, #80] +// mul r0, r1, r0 +// str r0, [r5], #4 +// +// note that we saved 2 registers here almostly "for free". +// ===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-global-merge" +#include "ARM.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Attributes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +namespace { + class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// target type sizes. + const TargetLowering *TLI; + + bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool) const; + + public: + static char ID; // Pass identification, replacement for typeid. + explicit ARMGlobalMerge(const TargetLowering *tli) + : FunctionPass(ID), TLI(tli) {} + + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function& F); + + const char *getPassName() const { + return "Merge internal globals"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + FunctionPass::getAnalysisUsage(AU); + } + + struct GlobalCmp { + const TargetData *TD; + + GlobalCmp(const TargetData *td): + TD(td) { } + + bool operator() (const GlobalVariable* GV1, + const GlobalVariable* GV2) { + const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType(); + const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType(); + + return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2)); + } + }; + }; +} // end anonymous namespace + +char ARMGlobalMerge::ID = 0; + +bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool isConst) const { + const TargetData *TD = TLI->getTargetData(); + + // FIXME: Infer the maximum possible offset depending on the actual users + // (these max offsets are different for the users inside Thumb or ARM + // functions) + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + + // FIXME: Find better heuristics + std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD)); + + const Type *Int32Ty = Type::getInt32Ty(M.getContext()); + + for (size_t i = 0, e = Globals.size(); i != e; ) { + size_t j = 0; + uint64_t MergedSize = 0; + std::vector<const Type*> Tys; + std::vector<Constant*> Inits; + for (j = i; MergedSize < MaxOffset && j != e; ++j) { + const Type* Ty = Globals[j]->getType()->getElementType(); + Tys.push_back(Ty); + Inits.push_back(Globals[j]->getInitializer()); + MergedSize += TD->getTypeAllocSize(Ty); + } + + StructType* MergedTy = StructType::get(M.getContext(), Tys); + Constant* MergedInit = ConstantStruct::get(MergedTy, Inits); + GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst, + GlobalValue::InternalLinkage, + MergedInit, "merged"); + for (size_t k = i; k < j; ++k) { + SmallVector<Constant*, 2> Idx; + Idx.push_back(ConstantInt::get(Int32Ty, 0)); + Idx.push_back(ConstantInt::get(Int32Ty, k-i)); + + Constant* GEP = + ConstantExpr::getInBoundsGetElementPtr(MergedGV, + &Idx[0], Idx.size()); + + Globals[k]->replaceAllUsesWith(GEP); + Globals[k]->eraseFromParent(); + } + i = j; + } + + return true; +} + + +bool ARMGlobalMerge::doInitialization(Module& M) { + SmallVector<GlobalVariable*, 16> Globals, ConstGlobals; + const TargetData *TD = TLI->getTargetData(); + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + bool Changed = false; + + // Grab all non-const globals. + for (Module::global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + // Merge is safe for "normal" internal globals only + if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) + continue; + + // Ignore fancy-aligned globals for now. + if (I->getAlignment() != 0) + continue; + + // Ignore all 'special' globals. + if (I->getName().startswith("llvm.") || + I->getName().startswith(".llvm.")) + continue; + + if (TD->getTypeAllocSize(I->getType()) < MaxOffset) { + if (I->isConstant()) + ConstGlobals.push_back(I); + else + Globals.push_back(I); + } + } + + if (Globals.size() > 1) + Changed |= doMerge(Globals, M, false); + // FIXME: This currently breaks the EH processing due to way how the + // typeinfo detection works. We might want to detect the TIs and ignore + // them in the future. + + // if (ConstGlobals.size() > 1) + // Changed |= doMerge(ConstGlobals, M, true); + + return Changed; +} + +bool ARMGlobalMerge::runOnFunction(Function& F) { + return false; +} + +FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) { + return new ARMGlobalMerge(tli); +} diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c84d3ff81324..51a30c158dd1 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -36,6 +36,11 @@ using namespace llvm; +static cl::opt<bool> +DisableShifterOp("disable-shifter-op", cl::Hidden, + cl::desc("Disable isel of shifter-op"), + cl::init(false)); + //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -113,6 +118,16 @@ public: bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); + inline bool Pred_so_imm(SDNode *inN) const { + ConstantSDNode *N = cast<ConstantSDNode>(inN); + return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; + } + + inline bool Pred_t2_so_imm(SDNode *inN) const { + ConstantSDNode *N = cast<ConstantSDNode>(inN); + return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1; + } + // Include the pieces autogenerated from the target description. #include "ARMGenDAGISel.inc" @@ -220,6 +235,9 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op, SDValue &BaseReg, SDValue &ShReg, SDValue &Opc) { + if (DisableShifterOp) + return false; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); // Don't match base register only case. That is matched to a separate @@ -463,7 +481,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Mode) { Addr = N; - Mode = CurDAG->getTargetConstant(0, MVT::i32); + Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32); return true; } @@ -666,6 +684,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N, bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N, SDValue &BaseReg, SDValue &Opc) { + if (DisableShifterOp) + return false; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); // Don't match base register only case. That is matched to a separate @@ -1090,110 +1111,79 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, break; } + EVT ResTy; + if (NumVecs == 1) + ResTy = VT; + else { + unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; + if (!is64BitVector) + ResTyElts *= 2; + ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); + } + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue SuperReg; if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; - std::vector<EVT> ResTys(NumVecs, VT); - ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); - if (NumVecs < 2) + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5); + if (NumVecs == 1) return VLd; - SDValue RegSeq; - SDValue V0 = SDValue(VLd, 0); - SDValue V1 = SDValue(VLd, 1); - - // Form a REG_SEQUENCE to force register allocation. - if (NumVecs == 2) - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - else { - SDValue V2 = SDValue(VLd, 2); - // If it's a vld3, form a quad D-register but discard the last part. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : SDValue(VLd, 3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } - + SuperReg = SDValue(VLd, 0); assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec, - dl, VT, RegSeq); + dl, VT, SuperReg); ReplaceUses(SDValue(N, Vec), D); } - ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); return NULL; } - EVT RegVT = GetNEONSubregVT(VT); if (NumVecs <= 2) { // Quad registers are directly supported for VLD1 and VLD2, // loading pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; - std::vector<EVT> ResTys(2 * NumVecs, RegVT); - ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); - Chain = SDValue(VLd, 2 * NumVecs); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5); + if (NumVecs == 1) + return VLd; + + SuperReg = SDValue(VLd, 0); + Chain = SDValue(VLd, 1); - // Combine the even and odd subregs to produce the result. - if (NumVecs == 1) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); - ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); - } else { - SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, - SDValue(VLd, 0), SDValue(VLd, 1), - SDValue(VLd, 2), SDValue(VLd, 3)), 0); - SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); - SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); - ReplaceUses(SDValue(N, 0), Q0); - ReplaceUses(SDValue(N, 1), Q1); - } } else { // Otherwise, quad registers are loaded with two separate instructions, // where one loads the even registers and the other loads the odd registers. - - std::vector<EVT> ResTys(NumVecs, RegVT); - ResTys.push_back(MemAddr.getValueType()); - ResTys.push_back(MVT::Other); + EVT AddrTy = MemAddr.getValueType(); // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6); - Chain = SDValue(VLdA, NumVecs+1); + SDValue ImplDef = + SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); + const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; + SDNode *VLdA = + CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7); + Chain = SDValue(VLdA, 2); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), - Align, Reg0, Pred, Reg0, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); - Chain = SDValue(VLdB, NumVecs+1); - - SDValue V0 = SDValue(VLdA, 0); - SDValue V1 = SDValue(VLdB, 0); - SDValue V2 = SDValue(VLdA, 1); - SDValue V3 = SDValue(VLdB, 1); - SDValue V4 = SDValue(VLdA, 2); - SDValue V5 = SDValue(VLdB, 2); - SDValue V6 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) - : SDValue(VLdA, 3); - SDValue V7 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) - : SDValue(VLdB, 3); - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, - V4, V5, V6, V7), 0); - - // Extract out the 3 / 4 Q registers. - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, - dl, VT, RegSeq); - ReplaceUses(SDValue(N, Vec), Q); - } + const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0), + Pred, Reg0, Chain }; + SDNode *VLdB = + CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7); + SuperReg = SDValue(VLdB, 0); + Chain = SDValue(VLdB, 2); + } + + // Extract out the Q registers. + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, + dl, VT, SuperReg); + ReplaceUses(SDValue(N, Vec), Q); } ReplaceUses(SDValue(N, NumVecs), Chain); return NULL; @@ -1235,12 +1225,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 10> Ops; + SmallVector<SDValue, 7> Ops; Ops.push_back(MemAddr); Ops.push_back(Align); if (is64BitVector) { - if (NumVecs >= 2) { + if (NumVecs == 1) { + Ops.push_back(N->getOperand(3)); + } else { SDValue RegSeq; SDValue V0 = N->getOperand(0+3); SDValue V1 = N->getOperand(1+3); @@ -1257,111 +1249,61 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, : N->getOperand(3+3); RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, - RegSeq)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, - RegSeq)); - if (NumVecs > 2) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, - RegSeq)); - if (NumVecs > 3) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, - RegSeq)); - } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + Ops.push_back(RegSeq); } Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = DOpcodes[OpcodeIndex]; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6); } - EVT RegVT = GetNEONSubregVT(VT); if (NumVecs <= 2) { - // Quad registers are directly supported for VST1 and VST2, - // storing pairs of D regs. + // Quad registers are directly supported for VST1 and VST2. unsigned Opc = QOpcodes0[OpcodeIndex]; - if (NumVecs == 2) { - // First extract the pair of Q registers. + if (NumVecs == 1) { + Ops.push_back(N->getOperand(3)); + } else { + // Form a QQ register. SDValue Q0 = N->getOperand(3); SDValue Q1 = N->getOperand(4); - - // Form a QQ register. - SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT, - QQ)); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4); - } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3))); - } - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), - 5 + 2 * NumVecs); + Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0)); } + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. // Form the QQQQ REG_SEQUENCE. - SDValue V[8]; - for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3)); - V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3)); - } - if (NumVecs == 3) - V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], - V[4], V[5], V[6], V[7]), 0); + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) + : N->getOperand(3+3); + SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); // Store the even D registers. - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); Ops.push_back(Reg0); // post-access address offset - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, - RegVT, RegSeq)); + Ops.push_back(RegSeq); Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); + MVT::Other, Ops.data(), 7); Chain = SDValue(VStA, 1); // Store the odd D registers. Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, - RegVT, RegSeq); - Ops[NumVecs+5] = Chain; + Ops[6] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); + MVT::Other, Ops.data(), 7); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1675,7 +1617,7 @@ SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, if (!T) return 0; - if (Predicate_t2_so_imm(TrueVal.getNode())) { + if (Pred_t2_so_imm(TrueVal.getNode())) { SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; @@ -1692,7 +1634,7 @@ SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, if (!T) return 0; - if (Predicate_so_imm(TrueVal.getNode())) { + if (Pred_so_imm(TrueVal.getNode())) { SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; @@ -1740,7 +1682,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { } // Pattern: (ARMcmov:i32 GPR:i32:$false, - // (imm:i32)<<P:Predicate_so_imm>>:$true, + // (imm:i32)<<P:Pred_so_imm>>:$true, // (imm:i32):$cc) // Emits: (MOVCCi:i32 GPR:i32:$false, // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) @@ -2013,43 +1955,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ResNode = SelectARMIndexedLoad(N); if (ResNode) return ResNode; - - // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value. - if (Subtarget->hasVFP2() && - N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) { - SDValue Chain = N->getOperand(0); - SDValue AM5Opc = - CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = getAL(CurDAG); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain }; - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); - SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl, - MVT::v2f64, MVT::Other, Ops, 5); - cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); - return Ret; - } - // Other cases are autogenerated. - break; - } - case ISD::STORE: { - // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value. - if (Subtarget->hasVFP2() && - N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) { - SDValue Chain = N->getOperand(0); - SDValue AM5Opc = - CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = getAL(CurDAG); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { N->getOperand(1), N->getOperand(2), - AM5Opc, Pred, PredReg, Chain }; - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); - SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6); - cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); - return Ret; - } // Other cases are autogenerated. break; } @@ -2206,39 +2111,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vld1: { unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, ARM::VLD1d32, ARM::VLD1d64 }; - unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, - ARM::VLD1q32, ARM::VLD1q64 }; + unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo, + ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo }; return SelectVLD(N, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld2: { - unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, - ARM::VLD2d32, ARM::VLD1q64 }; - unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 }; + unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo, + ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, + ARM::VLD2q32Pseudo }; return SelectVLD(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld3: { - unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16, - ARM::VLD3d32, ARM::VLD1d64T }; - unsigned QOpcodes0[] = { ARM::VLD3q8_UPD, - ARM::VLD3q16_UPD, - ARM::VLD3q32_UPD }; - unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD, - ARM::VLD3q16odd_UPD, - ARM::VLD3q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo, + ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo }; + unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, + ARM::VLD3q16Pseudo_UPD, + ARM::VLD3q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, + ARM::VLD3q16oddPseudo_UPD, + ARM::VLD3q32oddPseudo_UPD }; return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4: { - unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16, - ARM::VLD4d32, ARM::VLD1d64Q }; - unsigned QOpcodes0[] = { ARM::VLD4q8_UPD, - ARM::VLD4q16_UPD, - ARM::VLD4q32_UPD }; - unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD, - ARM::VLD4q16odd_UPD, - ARM::VLD4q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo, + ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo }; + unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, + ARM::VLD4q16Pseudo_UPD, + ARM::VLD4q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, + ARM::VLD4q16oddPseudo_UPD, + ARM::VLD4q32oddPseudo_UPD }; return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } @@ -2266,39 +2172,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst1: { unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, ARM::VST1d32, ARM::VST1d64 }; - unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, - ARM::VST1q32, ARM::VST1q64 }; + unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo, + ARM::VST1q32Pseudo, ARM::VST1q64Pseudo }; return SelectVST(N, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst2: { - unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, - ARM::VST2d32, ARM::VST1q64 }; - unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 }; + unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo, + ARM::VST2d32Pseudo, ARM::VST1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, + ARM::VST2q32Pseudo }; return SelectVST(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst3: { - unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16, - ARM::VST3d32, ARM::VST1d64T }; - unsigned QOpcodes0[] = { ARM::VST3q8_UPD, - ARM::VST3q16_UPD, - ARM::VST3q32_UPD }; - unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD, - ARM::VST3q16odd_UPD, - ARM::VST3q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo, + ARM::VST3d32Pseudo, ARM::VST1d64TPseudo }; + unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, + ARM::VST3q16Pseudo_UPD, + ARM::VST3q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, + ARM::VST3q16oddPseudo_UPD, + ARM::VST3q32oddPseudo_UPD }; return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4: { - unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16, - ARM::VST4d32, ARM::VST1d64Q }; - unsigned QOpcodes0[] = { ARM::VST4q8_UPD, - ARM::VST4q16_UPD, - ARM::VST4q32_UPD }; - unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD, - ARM::VST4q16odd_UPD, - ARM::VST4q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo, + ARM::VST4d32Pseudo, ARM::VST1d64QPseudo }; + unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, + ARM::VST4q16Pseudo_UPD, + ARM::VST4q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, + ARM::VST4q16oddPseudo_UPD, + ARM::VST4q32oddPseudo_UPD }; return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0091df753eb7..ce4a2c90689c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -55,7 +55,14 @@ STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> EnableARMTailCalls("arm-tail-calls", cl::Hidden, cl::desc("Generate tail calls (TEMPORARY OPTION)."), - cl::init(true)); + cl::init(false)); + +// This option should go away when Machine LICM is smart enough to hoist a +// reg-to-reg VDUP. +static cl::opt<bool> +EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden, + cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."), + cl::init(false)); static cl::opt<bool> EnableARMLongCalls("arm-long-calls", cl::Hidden, @@ -122,7 +129,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); + setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); } + setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { @@ -166,6 +176,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); + RegInfo = TM.getRegisterInfo(); if (Subtarget->isTargetDarwin()) { // Uses VFP for Thumb libfuncs if available. @@ -264,7 +275,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::i32, ARM::GPRRegisterClass); if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, ARM::SPRRegisterClass); - addRegisterClass(MVT::f64, ARM::DPRRegisterClass); + if (!Subtarget->isFPOnlySP()) + addRegisterClass(MVT::f64, ARM::DPRRegisterClass); setTruncStoreAction(MVT::f64, MVT::f32, Expand); } @@ -310,9 +322,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); + // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); - setOperationAction(ISD::MUL, MVT::v2i64, Expand); + // Custom handling for some quad-vector types to detect VMULL. + setOperationAction(ISD::MUL, MVT::v8i16, Custom); + setOperationAction(ISD::MUL, MVT::v4i32, Custom); + setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); @@ -410,12 +427,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // doesn't yet know how to not do that for SjLj. setExceptionSelectorRegister(ARM::R0); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise - // use the default expansion. - bool canHandleAtomics = - (Subtarget->hasV7Ops() || - (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())); - if (canHandleAtomics) { + // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use + // the default expansion. + if (Subtarget->hasDataBarrier() || + (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) { // membarrier needs custom lowering; the rest are legal and handled // normally. setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); @@ -466,10 +481,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); + } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -481,9 +498,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::SETCC, MVT::f64, Expand); - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); @@ -530,6 +547,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); + if (Subtarget->hasV6T2Ops()) + setTargetDAGCombine(ISD::OR); + setStackPointerRegisterToSaveRestore(ARM::SP); if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) @@ -547,6 +567,37 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) benefitFromCodePlacementOpt = true; } +std::pair<const TargetRegisterClass*, uint8_t> +ARMTargetLowering::findRepresentativeClass(EVT VT) const{ + const TargetRegisterClass *RRC = 0; + uint8_t Cost = 1; + switch (VT.getSimpleVT().SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(VT); + // Use DPR as representative register class for all floating point + // and vector types. Since there are 32 SPR registers and 32 DPR registers so + // the cost is 1 for both f32 and f64. + case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: + case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: + RRC = ARM::DPRRegisterClass; + break; + case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: + case MVT::v4f32: case MVT::v2f64: + RRC = ARM::DPRRegisterClass; + Cost = 2; + break; + case MVT::v4i64: + RRC = ARM::DPRRegisterClass; + Cost = 4; + break; + case MVT::v8i64: + RRC = ARM::DPRRegisterClass; + Cost = 8; + break; + } + return std::make_pair(RRC, Cost); +} + const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; @@ -561,6 +612,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; + case ARMISD::AND: return "ARMISD::AND"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; @@ -635,9 +687,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::VMULLs: return "ARMISD::VMULLs"; + case ARMISD::VMULLu: return "ARMISD::VMULLu"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; + case ARMISD::BFI: return "ARMISD::BFI"; } } @@ -656,11 +711,23 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { return TargetLowering::getRegClassFor(VT); } +// Create a fast isel object. +FastISel * +ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { + return ARM::createFastISel(funcInfo); +} + /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; } +/// getMaximalGlobalOffset - Returns the maximal possible offset which can +/// be used for loads / stores from the global. +unsigned ARMTargetLowering::getMaximalGlobalOffset() const { + return (Subtarget->isThumb1Only() ? 127 : 4095); +} + Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { unsigned NumVals = N->getNumValues(); if (!NumVals) @@ -688,6 +755,24 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { return Sched::RegPressure; } +unsigned +ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + switch (RC->getID()) { + default: + return 0; + case ARM::tGPRRegClassID: + return RegInfo->hasFP(MF) ? 4 : 5; + case ARM::GPRRegClassID: { + unsigned FP = RegInfo->hasFP(MF) ? 1 : 0; + return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0); + } + case ARM::SPRRegClassID: // Currently not used as 'rep' register class. + case ARM::DPRRegClassID: + return 32 - 10; + } +} + //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// @@ -793,8 +878,9 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCState &State, bool CanFail) { static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 }; - unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); + unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { // For the 2nd half of a v2f64, do not just fail. if (CanFail) @@ -812,6 +898,10 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, if (HiRegList[i] == Reg) break; + unsigned T = State.AllocateReg(LoRegList[i]); + (void)T; + assert(T == LoRegList[i] && "Could not allocate register"); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], LocVT, LocInfo)); @@ -1624,6 +1714,10 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); } +unsigned ARMTargetLowering::getJumpTableEncoding() const { + return MachineJumpTableInfo::EK_Inline; +} + SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -1917,17 +2011,19 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, DebugLoc dl = Op.getDebugLoc(); SDValue Op5 = Op.getOperand(5); unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); - // v6 and v7 can both handle barriers directly, but need handled a bit - // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should + // Some subtargets which have dmb and dsb instructions can handle barriers + // directly. Some ARMv6 cpus can support them with the help of mcr + // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should // never get here. unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; - if (Subtarget->hasV7Ops()) + if (Subtarget->hasDataBarrier()) return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); - else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()) + else { + assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() && + "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, MVT::i32)); - assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); - return SDValue(); + } } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { @@ -1945,54 +2041,6 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { } SDValue -ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, - SelectionDAG &DAG) const { - SDNode *Node = Op.getNode(); - DebugLoc dl = Node->getDebugLoc(); - EVT VT = Node->getValueType(0); - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - SDValue Align = Op.getOperand(2); - - // Chain the dynamic stack allocation so that it doesn't modify the stack - // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); - - unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue(); - unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment(); - if (AlignVal > StackAlign) - // Do this now since selection pass cannot introduce new target - // independent node. - Align = DAG.getConstant(-(uint64_t)AlignVal, VT); - - // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up - // using a "add r, sp, r" instead. Negate the size now so we don't have to - // do even more horrible hack later. - MachineFunction &MF = DAG.getMachineFunction(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - if (AFI->isThumb1OnlyFunction()) { - bool Negate = true; - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size); - if (C) { - uint32_t Val = C->getZExtValue(); - if (Val <= 508 && ((Val & 3) == 0)) - Negate = false; - } - if (Negate) - Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size); - } - - SDVTList VTList = DAG.getVTList(VT, MVT::Other); - SDValue Ops1[] = { Chain, Size, Align }; - SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3); - Chain = Res.getValue(1); - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), SDValue()); - SDValue Ops2[] = { Res, Chain }; - return DAG.getMergeValues(Ops2, 2, dl); -} - -SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, DebugLoc dl) const { @@ -2229,28 +2277,28 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, default: break; case ISD::SETLT: case ISD::SETGE: - if (isLegalICmpImmediate(C-1)) { + if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETULT: case ISD::SETUGE: - if (C > 0 && isLegalICmpImmediate(C-1)) { + if (C != 0 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETLE: case ISD::SETGT: - if (isLegalICmpImmediate(C+1)) { + if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; RHS = DAG.getConstant(C+1, MVT::i32); } break; case ISD::SETULE: case ISD::SETUGT: - if (C < 0xffffffff && isLegalICmpImmediate(C+1)) { + if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; RHS = DAG.getConstant(C+1, MVT::i32); } @@ -2287,6 +2335,52 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); } +SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { + SDValue Cond = Op.getOperand(0); + SDValue SelectTrue = Op.getOperand(1); + SDValue SelectFalse = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + // Convert: + // + // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) + // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) + // + if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { + const ConstantSDNode *CMOVTrue = + dyn_cast<ConstantSDNode>(Cond.getOperand(0)); + const ConstantSDNode *CMOVFalse = + dyn_cast<ConstantSDNode>(Cond.getOperand(1)); + + if (CMOVTrue && CMOVFalse) { + unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); + unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); + + SDValue True; + SDValue False; + if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { + True = SelectTrue; + False = SelectFalse; + } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { + True = SelectFalse; + False = SelectTrue; + } + + if (True.getNode() && False.getNode()) { + EVT VT = Cond.getValueType(); + SDValue ARMcc = Cond.getOperand(2); + SDValue CCR = Cond.getOperand(3); + SDValue Cmp = Cond.getOperand(4); + return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); + } + } + } + + return DAG.getSelectCC(dl, Cond, + DAG.getConstant(0, Cond.getValueType()), + SelectTrue, SelectFalse, ISD::SETNE); +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -2403,8 +2497,9 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { bool SeenZero = false; if (canChangeToInt(LHS, SeenZero, Subtarget) && canChangeToInt(RHS, SeenZero, Subtarget) && - // If one of the operand is zero, it's safe to ignore the NaN case. - (FiniteOnlyFPMath() || SeenZero)) { + // If one of the operand is zero, it's safe to ignore the NaN case since + // we only care about equality comparisons. + (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { // If unsafe fp math optimization is enabled and there are no othter uses of // the CMP operands, and the condition code is EQ oe NE, we can optimize it // to an integer comparison. @@ -2587,7 +2682,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass); + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } @@ -2730,6 +2825,24 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } +SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, + SelectionDAG &DAG) const { + // The rounding mode is in bits 23:22 of the FPSCR. + // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 + // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) + // so that the shift + and get folded into a bitfield extract. + DebugLoc dl = Op.getDebugLoc(); + SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, + DAG.getConstant(Intrinsic::arm_get_fpscr, + MVT::i32)); + SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, + DAG.getConstant(1U << 22, MVT::i32)); + SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, + DAG.getConstant(22, MVT::i32)); + return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, + DAG.getConstant(3, MVT::i32)); +} + static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); @@ -3046,6 +3159,11 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; + + // Assume that the first shuffle index is not UNDEF. Fail if it is. + if (M[0] < 0) + return false; + Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first @@ -3061,6 +3179,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, ReverseVEXT = true; } + if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast<unsigned>(M[i])) return false; } @@ -3086,13 +3205,16 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; + // If the first shuffle index is UNDEF, be optimistic. + if (M[0] < 0) + BlockElts = BlockSize / EltSz; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) return false; for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned) M[i] != - (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) + if (M[i] < 0) continue; // ignore UNDEF indices + if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) return false; } @@ -3108,8 +3230,8 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { - if ((unsigned) M[i] != i + WhichResult || - (unsigned) M[i+1] != i + NumElts + WhichResult) + if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || + (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) return false; } return true; @@ -3127,8 +3249,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { - if ((unsigned) M[i] != i + WhichResult || - (unsigned) M[i+1] != i + WhichResult) + if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || + (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) return false; } return true; @@ -3143,6 +3265,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i != NumElts; ++i) { + if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned) M[i] != 2 * i + WhichResult) return false; } @@ -3168,7 +3291,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, for (unsigned j = 0; j != 2; ++j) { unsigned Idx = WhichResult; for (unsigned i = 0; i != Half; ++i) { - if ((unsigned) M[i + j * Half] != Idx) + int MIdx = M[i + j * Half]; + if (MIdx >= 0 && (unsigned) MIdx != Idx) return false; Idx += 2; } @@ -3191,8 +3315,8 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { - if ((unsigned) M[i] != Idx || - (unsigned) M[i+1] != Idx + NumElts) + if ((M[i] >= 0 && (unsigned) M[i] != Idx) || + (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) return false; Idx += 1; } @@ -3217,8 +3341,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { - if ((unsigned) M[i] != Idx || - (unsigned) M[i+1] != Idx) + if ((M[i] >= 0 && (unsigned) M[i] != Idx) || + (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) return false; Idx += 1; } @@ -3230,9 +3354,30 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } +// If N is an integer constant that can be moved into a register in one +// instruction, return an SDValue of such a constant (will become a MOV +// instruction). Otherwise return null. +static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, + const ARMSubtarget *ST, DebugLoc dl) { + uint64_t Val; + if (!isa<ConstantSDNode>(N)) + return SDValue(); + Val = cast<ConstantSDNode>(N)->getZExtValue(); + + if (ST->isThumb1Only()) { + if (Val <= 255 || ~Val <= 255) + return DAG.getConstant(Val, MVT::i32); + } else { + if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) + return DAG.getConstant(Val, MVT::i32); + } + return SDValue(); +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. -static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -3292,15 +3437,41 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { if (isOnlyLowElement) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); - // If all elements are constants, fall back to the default expansion, which - // will generate a load from the constant pool. + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + + if (EnableARMVDUPsplat) { + // Use VDUP for non-constant splats. For f32 constant splats, reduce to + // i32 and try again. + if (usesOnlyOneValue && EltSize <= 32) { + if (!isConstant) + return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (VT.getVectorElementType().isFloatingPoint()) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumElts; ++i) + Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, + Op.getOperand(i))); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0], + NumElts); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + LowerBUILD_VECTOR(Val, DAG, ST)); + } + SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); + if (Val.getNode()) + return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + } + } + + // If all elements are constants and the case above didn't get hit, fall back + // to the default expansion, which will generate a load from the constant + // pool. if (isConstant) return SDValue(); - // Use VDUP for non-constant splats. - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (usesOnlyOneValue && EltSize <= 32) - return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (!EnableARMVDUPsplat) { + // Use VDUP for non-constant splats. + if (usesOnlyOneValue && EltSize <= 32) + return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + } // Vectors with 32- or 64-bit elements can be built by directly assigning // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands @@ -3585,6 +3756,51 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); } +/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or +/// an extending load, return the unextended value. +static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) + return N->getOperand(0); + LoadSDNode *LD = cast<LoadSDNode>(N); + return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), + LD->getBasePtr(), LD->getSrcValue(), + LD->getSrcValueOffset(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); +} + +static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { + // Multiplications are only custom-lowered for 128-bit vectors so that + // VMULL can be detected. Otherwise v2i64 multiplications are not legal. + EVT VT = Op.getValueType(); + assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); + SDNode *N0 = Op.getOperand(0).getNode(); + SDNode *N1 = Op.getOperand(1).getNode(); + unsigned NewOpc = 0; + if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) && + (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) { + NewOpc = ARMISD::VMULLs; + } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) && + (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) { + NewOpc = ARMISD::VMULLu; + } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) { + // Fall through to expand this. It is not legal. + return SDValue(); + } else { + // Other vector multiplications are legal. + return Op; + } + + // Legalize to a VMULL instruction. + DebugLoc DL = Op.getDebugLoc(); + SDValue Op0 = SkipExtension(N0, DAG); + SDValue Op1 = SkipExtension(N1, DAG); + + assert(Op0.getValueType().is64BitVector() && + Op1.getValueType().is64BitVector() && + "unexpected types for extended operands to VMULL"); + return DAG.getNode(NewOpc, DL, VT, Op0, Op1); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -3594,10 +3810,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); - case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::SINT_TO_FP: @@ -3621,10 +3837,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::VSETCC: return LowerVSETCC(Op, DAG); - case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); + case ISD::MUL: return LowerMUL(Op, DAG); } return SDValue(); } @@ -4002,78 +4220,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } - - case ARM::tANDsp: - case ARM::tADDspr_: - case ARM::tSUBspi_: - case ARM::t2SUBrSPi_: - case ARM::t2SUBrSPi12_: - case ARM::t2SUBrSPs_: { - MachineFunction *MF = BB->getParent(); - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); - bool DstIsDead = MI->getOperand(0).isDead(); - bool SrcIsKill = MI->getOperand(1).isKill(); - - if (SrcReg != ARM::SP) { - // Copy the source to SP from virtual register. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); - unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) - ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; - BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP) - .addReg(SrcReg, getKillRegState(SrcIsKill)); - } - - unsigned OpOpc = 0; - bool NeedPred = false, NeedCC = false, NeedOp3 = false; - switch (MI->getOpcode()) { - default: - llvm_unreachable("Unexpected pseudo instruction!"); - case ARM::tANDsp: - OpOpc = ARM::tAND; - NeedPred = true; - break; - case ARM::tADDspr_: - OpOpc = ARM::tADDspr; - break; - case ARM::tSUBspi_: - OpOpc = ARM::tSUBspi; - break; - case ARM::t2SUBrSPi_: - OpOpc = ARM::t2SUBrSPi; - NeedPred = true; NeedCC = true; - break; - case ARM::t2SUBrSPi12_: - OpOpc = ARM::t2SUBrSPi12; - NeedPred = true; - break; - case ARM::t2SUBrSPs_: - OpOpc = ARM::t2SUBrSPs; - NeedPred = true; NeedCC = true; NeedOp3 = true; - break; - } - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP); - if (OpOpc == ARM::tAND) - AddDefaultT1CC(MIB); - MIB.addReg(ARM::SP); - MIB.addOperand(MI->getOperand(2)); - if (NeedOp3) - MIB.addOperand(MI->getOperand(3)); - if (NeedPred) - AddDefaultPred(MIB); - if (NeedCC) - AddDefaultCC(MIB); - - // Copy the result from SP to virtual register. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); - unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) - ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; - BuildMI(*BB, MI, dl, TII->get(CopyOpc)) - .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(ARM::SP); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - } } } @@ -4141,30 +4287,42 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, return SDValue(); } -/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. -static SDValue PerformADDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - // added by evan in r37685 with no testcase. - SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); - +/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with +/// operands N0 and N1. This is a helper for PerformADDCombine that is +/// called with the default operands, and if that fails, with commuted +/// operands. +static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, + TargetLowering::DAGCombinerInfo &DCI) { // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N0, N1, DCI); if (Result.getNode()) return Result; } - if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N1, N0, DCI); - if (Result.getNode()) return Result; - } - return SDValue(); } +/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. +/// +static SDValue PerformADDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // First try with the default operand order. + SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI); + if (Result.getNode()) + return Result; + + // If that didn't work, try again with the operands commuted. + return PerformADDCombineWithOperands(N, N1, N0, DCI); +} + /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. +/// static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - // added by evan in r37685 with no testcase. - SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { @@ -4231,6 +4389,105 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +/// PerformORCombine - Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when + // reasonable. + + // BFI is only available on V6T2+ + if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + // 1) or (and A, mask), val => ARMbfi A, val, mask + // iff (val & mask) == val + // + // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask + // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) + // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) + // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // (i.e., copy a bitfield value into another bitfield of the same width) + if (N0.getOpcode() != ISD::AND) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + + + // The value and the mask need to be constants so we can verify this is + // actually a bitfield set. If the mask is 0xffff, we can do better + // via a movt instruction, so don't use BFI in that case. + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C) + return SDValue(); + unsigned Mask = C->getZExtValue(); + if (Mask == 0xffff) + return SDValue(); + SDValue Res; + // Case (1): or (and A, mask), val => ARMbfi A, val, mask + if ((C = dyn_cast<ConstantSDNode>(N1))) { + unsigned Val = C->getZExtValue(); + if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val) + return SDValue(); + Val >>= CountTrailingZeros_32(~Mask); + + Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), + DAG.getConstant(Val, MVT::i32), + DAG.getConstant(Mask, MVT::i32)); + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + } else if (N1.getOpcode() == ISD::AND) { + // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask + C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!C) + return SDValue(); + unsigned Mask2 = C->getZExtValue(); + + if (ARM::isBitFieldInvertedMask(Mask) && + ARM::isBitFieldInvertedMask(~Mask2) && + (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) { + // The pack halfword instruction works better for masks that fit it, + // so use that when it's available. + if (Subtarget->hasT2ExtractPack() && + (Mask == 0xffff || Mask == 0xffff0000)) + return SDValue(); + // 2a + unsigned lsb = CountTrailingZeros_32(Mask2); + Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), + DAG.getConstant(lsb, MVT::i32)); + Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res, + DAG.getConstant(Mask, MVT::i32)); + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + } else if (ARM::isBitFieldInvertedMask(~Mask) && + ARM::isBitFieldInvertedMask(Mask2) && + (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) { + // The pack halfword instruction works better for masks that fit it, + // so use that when it's available. + if (Subtarget->hasT2ExtractPack() && + (Mask2 == 0xffff || Mask2 == 0xffff0000)) + return SDValue(); + // 2b + unsigned lsb = CountTrailingZeros_32(Mask); + Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(lsb, MVT::i32)); + Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, + DAG.getConstant(Mask2, MVT::i32)); + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + } + } + + return SDValue(); +} + /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, @@ -4561,7 +4818,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { // If the target supports NEON, try to use vmax/vmin instructions for f32 - // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set, + // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is // a NaN; only do the transformation when it matches that behavior. @@ -4648,6 +4905,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); + case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); @@ -5379,6 +5637,21 @@ int ARM::getVFPf64Imm(const APFloat &FPImm) { return ((int)Sign << 7) | (Exp << 4) | Mantissa; } +bool ARM::isBitFieldInvertedMask(unsigned v) { + if (v == 0xffffffff) + return 0; + // there can be 1's on either or both "outsides", all the "inside" + // bits must be 0's + unsigned int lsb = 0, msb = 31; + while (v & (1 << msb)) --msb; + while (v & (1 << lsb)) ++lsb; + for (unsigned int i = lsb; i <= msb; ++i) { + if (v & (1 << i)) + return 0; + } + return 1; +} + /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 128b72e1e743..ba9ea7f15e7b 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -17,6 +17,8 @@ #include "ARMSubtarget.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/CallingConvLower.h" #include <vector> @@ -45,6 +47,8 @@ namespace llvm { PIC_ADD, // Add with a PC operand and a PIC label. + AND, // ARM "and" instruction that sets the 's' flag in CPSR. + CMP, // ARM compare instructions. CMPZ, // ARM compare that sets only Z flag. CMPFP, // ARM VFP compare instruction, sets FPSCR. @@ -80,7 +84,7 @@ namespace llvm { MEMBARRIER, // Memory barrier SYNCBARRIER, // Memory sync barrier - + VCEQ, // Vector compare equal. VCGE, // Vector compare greater than or equal. VCGEU, // Vector compare unsigned greater than or equal. @@ -141,6 +145,10 @@ namespace llvm { VUZP, // unzip (deinterleave) VTRN, // transpose + // Vector multiply long: + VMULLs, // ...signed + VMULLu, // ...unsigned + // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other // operations, but for ARM some BUILD_VECTORs are legal as-is and their @@ -150,7 +158,10 @@ namespace llvm { // Floating-point max and min: FMAX, - FMIN + FMIN, + + // Bit-field insert + BFI }; } @@ -162,6 +173,7 @@ namespace llvm { /// returns -1. int getVFPf32Imm(const APFloat &FPImm); int getVFPf64Imm(const APFloat &FPImm); + bool isBitFieldInvertedMask(unsigned v); } //===--------------------------------------------------------------------===// @@ -171,6 +183,8 @@ namespace llvm { public: explicit ARMTargetLowering(TargetMachine &TM); + virtual unsigned getJumpTableEncoding(void) const; + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result @@ -255,8 +269,19 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + /// getMaximalGlobalOffset - Returns the maximal possible offset which can + /// be used for loads / stores from the global. + virtual unsigned getMaximalGlobalOffset() const; + + /// createFastISel - This method returns a target specific FastISel object, + /// or null if the target does not support "fast" ISel. + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; + Sched::Preference getSchedulingPreference(SDNode *N) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -265,11 +290,17 @@ namespace llvm { /// materialize the FP immediate as a load from a constant pool. virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + protected: + std::pair<const TargetRegisterClass*, uint8_t> + findRepresentativeClass(EVT VT) const; + private: /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; + const TargetRegisterInfo *RegInfo; + /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created. /// unsigned ARMPCLabelIndex; @@ -310,14 +341,15 @@ namespace llvm { SelectionDAG &DAG) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -377,6 +409,10 @@ namespace llvm { unsigned BinOpcode) const; }; + + namespace ARM { + FastISel *createFastISel(FunctionLoweringInfo &funcInfo); + } } #endif // ARMISELLOWERING_H diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index ac568e75ccc4..113cfffe61f9 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -36,37 +36,38 @@ def LdStMulFrm : Format<10>; def LdStExFrm : Format<11>; def ArithMiscFrm : Format<12>; -def ExtFrm : Format<13>; - -def VFPUnaryFrm : Format<14>; -def VFPBinaryFrm : Format<15>; -def VFPConv1Frm : Format<16>; -def VFPConv2Frm : Format<17>; -def VFPConv3Frm : Format<18>; -def VFPConv4Frm : Format<19>; -def VFPConv5Frm : Format<20>; -def VFPLdStFrm : Format<21>; -def VFPLdStMulFrm : Format<22>; -def VFPMiscFrm : Format<23>; - -def ThumbFrm : Format<24>; -def MiscFrm : Format<25>; - -def NGetLnFrm : Format<26>; -def NSetLnFrm : Format<27>; -def NDupFrm : Format<28>; -def NLdStFrm : Format<29>; -def N1RegModImmFrm: Format<30>; -def N2RegFrm : Format<31>; -def NVCVTFrm : Format<32>; -def NVDupLnFrm : Format<33>; -def N2RegVShLFrm : Format<34>; -def N2RegVShRFrm : Format<35>; -def N3RegFrm : Format<36>; -def N3RegVShFrm : Format<37>; -def NVExtFrm : Format<38>; -def NVMulSLFrm : Format<39>; -def NVTBLFrm : Format<40>; +def SatFrm : Format<13>; +def ExtFrm : Format<14>; + +def VFPUnaryFrm : Format<15>; +def VFPBinaryFrm : Format<16>; +def VFPConv1Frm : Format<17>; +def VFPConv2Frm : Format<18>; +def VFPConv3Frm : Format<19>; +def VFPConv4Frm : Format<20>; +def VFPConv5Frm : Format<21>; +def VFPLdStFrm : Format<22>; +def VFPLdStMulFrm : Format<23>; +def VFPMiscFrm : Format<24>; + +def ThumbFrm : Format<25>; +def MiscFrm : Format<26>; + +def NGetLnFrm : Format<27>; +def NSetLnFrm : Format<28>; +def NDupFrm : Format<29>; +def NLdStFrm : Format<30>; +def N1RegModImmFrm: Format<31>; +def N2RegFrm : Format<32>; +def NVCVTFrm : Format<33>; +def NVDupLnFrm : Format<34>; +def N2RegVShLFrm : Format<35>; +def N2RegVShRFrm : Format<36>; +def N3RegFrm : Format<37>; +def N3RegVShFrm : Format<38>; +def NVExtFrm : Format<39>; +def NVMulSLFrm : Format<40>; +def NVTBLFrm : Format<41>; // Misc flags. @@ -87,21 +88,21 @@ class Xform16Bit { bit canXformTo16Bit = 1; } class AddrMode<bits<4> val> { bits<4> Value = val; } -def AddrModeNone : AddrMode<0>; -def AddrMode1 : AddrMode<1>; -def AddrMode2 : AddrMode<2>; -def AddrMode3 : AddrMode<3>; -def AddrMode4 : AddrMode<4>; -def AddrMode5 : AddrMode<5>; -def AddrMode6 : AddrMode<6>; -def AddrModeT1_1 : AddrMode<7>; -def AddrModeT1_2 : AddrMode<8>; -def AddrModeT1_4 : AddrMode<9>; -def AddrModeT1_s : AddrMode<10>; -def AddrModeT2_i12: AddrMode<11>; -def AddrModeT2_i8 : AddrMode<12>; -def AddrModeT2_so : AddrMode<13>; -def AddrModeT2_pc : AddrMode<14>; +def AddrModeNone : AddrMode<0>; +def AddrMode1 : AddrMode<1>; +def AddrMode2 : AddrMode<2>; +def AddrMode3 : AddrMode<3>; +def AddrMode4 : AddrMode<4>; +def AddrMode5 : AddrMode<5>; +def AddrMode6 : AddrMode<6>; +def AddrModeT1_1 : AddrMode<7>; +def AddrModeT1_2 : AddrMode<8>; +def AddrModeT1_4 : AddrMode<9>; +def AddrModeT1_s : AddrMode<10>; +def AddrModeT2_i12 : AddrMode<11>; +def AddrModeT2_i8 : AddrMode<12>; +def AddrModeT2_so : AddrMode<13>; +def AddrModeT2_pc : AddrMode<14>; def AddrModeT2_i8s4 : AddrMode<15>; // Instruction size. @@ -137,11 +138,17 @@ def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains // ARM special operands. // +def CondCodeOperand : AsmOperandClass { + let Name = "CondCode"; + let SuperClasses = []; +} + // ARM Predicate operand. Default to 14 = always (AL). Second part is CC // register whose default is 0 (no register). def pred : PredicateOperand<OtherVT, (ops i32imm, CCR), (ops (i32 14), (i32 zero_reg))> { let PrintMethod = "printPredicateOperand"; + let ParserMatchClass = CondCodeOperand; } // Conditional code result for instructions whose 's' bit is set, e.g. subs. @@ -240,6 +247,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } + // A few are not predicable class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, @@ -254,9 +262,9 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<Predicate> Predicates = [IsARM]; } -// Same as I except it can optionally modify CPSR. Note it's modeled as -// an input operand since by default it's a zero register. It will -// become an implicit def once it's "flipped". +// Same as I except it can optionally modify CPSR. Note it's modeled as an input +// operand since by default it's a zero register. It will become an implicit def +// once it's "flipped". class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, @@ -313,7 +321,7 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin, } class ABXIx2<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, itin, + : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, Pseudo, itin, asm, "", pattern>; // BR_JT instructions @@ -322,16 +330,14 @@ class JTI<dag oops, dag iops, InstrItinClass itin, : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin, asm, "", pattern>; - // Atomic load/store instructions - class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin, opc, asm, "", pattern> { let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; - let Inst{20} = 1; + let Inst{20} = 1; let Inst{11-0} = 0b111110011111; } class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, @@ -340,7 +346,7 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, opc, asm, "", pattern> { let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; - let Inst{20} = 0; + let Inst{20} = 0; let Inst{11-4} = 0b11111001; } @@ -350,21 +356,21 @@ class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern> { let Inst{24-21} = opcod; - let Inst{27-26} = {0,0}; + let Inst{27-26} = 0b00; } class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern> { let Inst{24-21} = opcod; - let Inst{27-26} = {0,0}; + let Inst{27-26} = 0b00; } class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, asm, "", pattern> { let Inst{24-21} = opcod; - let Inst{27-26} = {0,0}; + let Inst{27-26} = 0b00; } class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -377,7 +383,7 @@ class AI2<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern> { - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // loads @@ -389,7 +395,7 @@ class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -399,7 +405,7 @@ class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -409,7 +415,7 @@ class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -419,7 +425,7 @@ class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // stores @@ -431,7 +437,7 @@ class AI2stw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -441,7 +447,7 @@ class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -451,7 +457,7 @@ class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -461,7 +467,7 @@ class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Pre-indexed loads @@ -473,7 +479,7 @@ class AI2ldwpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -483,7 +489,7 @@ class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Pre-indexed stores @@ -495,7 +501,7 @@ class AI2stwpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -505,7 +511,7 @@ class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Post-indexed loads @@ -517,7 +523,7 @@ class AI2ldwpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -527,7 +533,7 @@ class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Post-indexed stores @@ -539,7 +545,7 @@ class AI2stwpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -549,7 +555,7 @@ class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // addrmode3 instructions @@ -977,7 +983,7 @@ class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3, Encoding { let Inst{31-27} = opcod1; let Inst{15-14} = opcod2; - let Inst{12} = opcod3; + let Inst{12} = opcod3; } // BR_JT instructions @@ -1099,13 +1105,13 @@ class T1Special<bits<4> opcode> : Encoding16 { // A6.2.4 Load/store single data item encoding. class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 { let Inst{15-12} = opA; - let Inst{11-9} = opB; + let Inst{11-9} = opB; } -class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>; +class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>; class T1LdSt4Imm<bits<3> opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes class T1LdSt1Imm<bits<3> opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte class T1LdSt2Imm<bits<3> opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes -class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative +class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative // A6.2.5 Miscellaneous 16-bit instructions encoding. class T1Misc<bits<7> opcode> : Encoding16 { @@ -1125,9 +1131,10 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<Predicate> Predicates = [IsThumb2]; } -// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as -// an input operand since by default it's a zero register. It will -// become an implicit def once it's "flipped". +// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an +// input operand since by default it's a zero register. It will become an +// implicit def once it's "flipped". +// // FIXME: This uses unified syntax so {s} comes before {p}. We should make it // more consistent. class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, @@ -1185,11 +1192,11 @@ class T2Ii8s4<bit P, bit W, bit load, dag oops, dag iops, InstrItinClass itin, pattern> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; - let Inst{24} = P; - let Inst{23} = ?; // The U bit. - let Inst{22} = 1; - let Inst{21} = W; - let Inst{20} = load; + let Inst{24} = P; + let Inst{23} = ?; // The U bit. + let Inst{22} = 1; + let Inst{21} = W; + let Inst{20} = load; } class T2sI<dag oops, dag iops, InstrItinClass itin, @@ -1225,14 +1232,14 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, list<Predicate> Predicates = [IsThumb2]; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; - let Inst{24} = signed; - let Inst{23} = 0; + let Inst{24} = signed; + let Inst{23} = 0; let Inst{22-21} = opcod; - let Inst{20} = load; - let Inst{11} = 1; + let Inst{20} = load; + let Inst{11} = 1; // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed - let Inst{10} = pre; // The P bit. - let Inst{8} = 1; // The W bit. + let Inst{10} = pre; // The P bit. + let Inst{8} = 1; // The W bit. } // Helper class for disassembly only @@ -1243,9 +1250,9 @@ class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops, : T2I<oops, iops, itin, opc, asm, pattern> { let Inst{31-27} = 0b11111; let Inst{26-24} = 0b011; - let Inst{23} = long; + let Inst{23} = long; let Inst{22-20} = op22_20; - let Inst{7-4} = op7_4; + let Inst{7-4} = op7_4; } // Tv5Pat - Same as Pat<>, but requires V5T Thumb mode. @@ -1325,9 +1332,9 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, } // Load / store multiple -class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, +class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : VFPXI<oops, iops, AddrMode5, Size4Bytes, im, + : VFPXI<oops, iops, AddrMode4, Size4Bytes, im, VFPLdStMulFrm, itin, asm, cstr, pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; @@ -1337,9 +1344,9 @@ class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, let D = VFPNeonDomain; } -class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, +class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : VFPXI<oops, iops, AddrMode5, Size4Bytes, im, + : VFPXI<oops, iops, AddrMode4, Size4Bytes, im, VFPLdStMulFrm, itin, asm, cstr, pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; @@ -1367,8 +1374,8 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{11-8} = 0b1011; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{6} = op6; + let Inst{4} = op4; } // Double precision, binary, VML[AS] (for additional predicate) @@ -1379,12 +1386,11 @@ class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{11-8} = 0b1011; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{6} = op6; + let Inst{4} = op4; list<Predicate> Predicates = [HasVFP2, UseVMLx]; } - // Single precision, unary class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, @@ -1415,8 +1421,8 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{11-8} = 0b1010; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{6} = op6; + let Inst{4} = op4; } // Single precision binary, if no NEON @@ -1521,10 +1527,18 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm, cstr, pattern> { let Inst{31-24} = 0b11110100; - let Inst{23} = op23; + let Inst{23} = op23; let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{7-4} = op7_4; + let Inst{11-8} = op11_8; + let Inst{7-4} = op7_4; +} + +class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr> + : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr, + itin> { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + list<Predicate> Predicates = [HasNEON]; } class NDataI<dag oops, dag iops, Format f, InstrItinClass itin, @@ -1548,13 +1562,13 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6, string opc, string dt, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> { - let Inst{23} = op23; + let Inst{23} = op23; let Inst{21-19} = op21_19; - let Inst{11-8} = op11_8; - let Inst{7} = op7; - let Inst{6} = op6; - let Inst{5} = op5; - let Inst{4} = op4; + let Inst{11-8} = op11_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{5} = op5; + let Inst{4} = op4; } // NEON 2 vector register format. @@ -1567,9 +1581,9 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; let Inst{17-16} = op17_16; - let Inst{11-7} = op11_7; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; } // Same as N2V except it doesn't have a datatype suffix. @@ -1582,9 +1596,9 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; let Inst{17-16} = op17_16; - let Inst{11-7} = op11_7; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; } // NEON 2 vector register with immediate. @@ -1592,12 +1606,12 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> { - let Inst{24} = op24; - let Inst{23} = op23; + let Inst{24} = op24; + let Inst{23} = op23; let Inst{11-8} = op11_8; - let Inst{7} = op7; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{4} = op4; } // NEON 3 vector register format. @@ -1605,12 +1619,12 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> { - let Inst{24} = op24; - let Inst{23} = op23; + let Inst{24} = op24; + let Inst{23} = op23; let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; } // Same as N3V except it doesn't have a data type suffix. @@ -1619,12 +1633,12 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> { - let Inst{24} = op24; - let Inst{23} = op23; + let Inst{24} = op24; + let Inst{23} = op23; let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; } // NEON VMOVs between scalar and core registers. @@ -1634,9 +1648,9 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain, "", itin> { let Inst{27-20} = opcod1; - let Inst{11-8} = opcod2; - let Inst{6-5} = opcod3; - let Inst{4} = 1; + let Inst{11-8} = opcod2; + let Inst{6-5} = opcod3; + let Inst{4} = 1; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); @@ -1670,9 +1684,9 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops, let Inst{24-23} = 0b11; let Inst{21-20} = 0b11; let Inst{19-16} = op19_16; - let Inst{11-7} = 0b11000; - let Inst{6} = op6; - let Inst{4} = 0; + let Inst{11-7} = 0b11000; + let Inst{6} = op6; + let Inst{4} = 0; } // NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 51fc1522485f..e66f9b9ad0ac 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -44,6 +44,10 @@ def SDT_ARMBCC_i64 : SDTypeProfile<0, 6, SDTCisVT<3, i32>, SDTCisVT<4, i32>, SDTCisVT<5, OtherVT>]>; +def SDT_ARMAnd : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; + def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, @@ -54,13 +58,16 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>; -def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; -def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>; +def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>; +def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; @@ -99,11 +106,14 @@ def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, [SDNPHasChain]>; +def ARMand : SDNode<"ARMISD::AND", SDT_ARMAnd, + [SDNPOutFlag]>; + def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, [SDNPOutFlag]>; def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, - [SDNPOutFlag,SDNPCommutative]>; + [SDNPOutFlag, SDNPCommutative]>; def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; @@ -117,51 +127,54 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; -def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7, - [SDNPHasChain]>; -def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7, - [SDNPHasChain]>; -def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6, - [SDNPHasChain]>; -def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6, - [SDNPHasChain]>; +def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, + [SDNPHasChain]>; +def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER, + [SDNPHasChain]>; +def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR, + [SDNPHasChain]>; +def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR, + [SDNPHasChain]>; def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; + +def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // -def HasV4T : Predicate<"Subtarget->hasV4TOps()">; -def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; -def HasV5T : Predicate<"Subtarget->hasV5TOps()">; -def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; -def HasV6 : Predicate<"Subtarget->hasV6Ops()">; -def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; -def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; -def HasV7 : Predicate<"Subtarget->hasV7Ops()">; -def NoVFP : Predicate<"!Subtarget->hasVFP2()">; -def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; -def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; -def HasNEON : Predicate<"Subtarget->hasNEON()">; -def HasDivide : Predicate<"Subtarget->hasDivide()">; +def HasV4T : Predicate<"Subtarget->hasV4TOps()">; +def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; +def HasV5T : Predicate<"Subtarget->hasV5TOps()">; +def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; +def HasV6 : Predicate<"Subtarget->hasV6Ops()">; +def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; +def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; +def HasV7 : Predicate<"Subtarget->hasV7Ops()">; +def NoVFP : Predicate<"!Subtarget->hasVFP2()">; +def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; +def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; +def HasNEON : Predicate<"Subtarget->hasNEON()">; +def HasDivide : Predicate<"Subtarget->hasDivide()">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">; -def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; +def HasDB : Predicate<"Subtarget->hasDataBarrier()">; +def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; -def IsThumb : Predicate<"Subtarget->isThumb()">; -def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; -def IsThumb2 : Predicate<"Subtarget->isThumb2()">; -def IsARM : Predicate<"!Subtarget->isThumb()">; -def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; -def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; +def IsThumb : Predicate<"Subtarget->isThumb()">; +def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; +def IsThumb2 : Predicate<"Subtarget->isThumb2()">; +def IsARM : Predicate<"!Subtarget->isThumb()">; +def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; +def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; // FIXME: Eventually this will be just "hasV6T2Ops". -def UseMovt : Predicate<"Subtarget->useMovt()">; -def DontUseMovt : Predicate<"!Subtarget->useMovt()">; - -def UseVMLx : Predicate<"Subtarget->useVMLx()">; +def UseMovt : Predicate<"Subtarget->useMovt()">; +def DontUseMovt : Predicate<"!Subtarget->useMovt()">; +def UseVMLx : Predicate<"Subtarget->useVMLx()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -221,29 +234,12 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{ /// e.g., 0xf000ffff def bf_inv_mask_imm : Operand<i32>, PatLeaf<(imm), [{ - uint32_t v = (uint32_t)N->getZExtValue(); - if (v == 0xffffffff) - return 0; - // there can be 1's on either or both "outsides", all the "inside" - // bits must be 0's - unsigned int lsb = 0, msb = 31; - while (v & (1 << msb)) --msb; - while (v & (1 << lsb)) ++lsb; - for (unsigned int i = lsb; i <= msb; ++i) { - if (v & (1 << i)) - return 0; - } - return 1; + return ARM::isBitFieldInvertedMask(N->getZExtValue()); }] > { let PrintMethod = "printBitfieldInvMaskImmOperand"; } /// Split a 32-bit immediate into two 16 bit parts. -def lo16 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() & 0xffff, - MVT::i32); -}]>; - def hi16 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32); }]>; @@ -306,6 +302,13 @@ def pclabel : Operand<i32> { let PrintMethod = "printPCLabel"; } +// shift_imm: An integer that encodes a shift amount and the type of shift +// (currently either asr or lsl) using the same encoding used for the +// immediates in so_reg operands. +def shift_imm : Operand<i32> { + let PrintMethod = "printShiftImmOperand"; +} + // shifter_operand operands: so_reg and so_imm. def so_reg : Operand<i32>, // reg reg imm ComplexPattern<i32, 3, "SelectShifterOperandReg", @@ -319,10 +322,7 @@ def so_reg : Operand<i32>, // reg reg imm // represented in the imm field in the same 12-bit form that they are encoded // into so_imm instructions: the 8-bit immediate is the least significant bits // [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11]. -def so_imm : Operand<i32>, - PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; - }]> { +def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> { let PrintMethod = "printSOImmOperand"; } @@ -452,11 +452,15 @@ include "ARMInstrFormats.td" /// binop that produces a value. multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { + // The register-immediate version is re-materializable. This is useful + // in particular for taking the address of a local. + let isReMaterializable = 1 in { def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> { let Inst{25} = 1; } + } def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { @@ -502,7 +506,7 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, /// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test /// patterns. Similar to AsI1_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. -let Defs = [CPSR] in { +let isCompare = 1, Defs = [CPSR] in { multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi, @@ -1117,7 +1121,7 @@ let isBranch = 1, isTerminator = 1 in { let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target \n$jt", + IIC_Br, "mov\tpc, $target$jt", [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { let Inst{11-4} = 0b00000000; let Inst{15-12} = 0b1111; @@ -1127,7 +1131,7 @@ let isBranch = 1, isTerminator = 1 in { } def BR_JTm : JTI<(outs), (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "ldr\tpc, $target \n$jt", + IIC_Br, "ldr\tpc, $target$jt", [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, imm:$id)]> { let Inst{15-12} = 0b1111; @@ -1139,7 +1143,7 @@ let isBranch = 1, isTerminator = 1 in { } def BR_JTadd : JTI<(outs), (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "add\tpc, $target, $idx \n$jt", + IIC_Br, "add\tpc, $target, $idx$jt", [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]> { let Inst{15-12} = 0b1111; @@ -1573,8 +1577,12 @@ defm UXTH : AI_unary_rrot<0b01101111, defm UXTB16 : AI_unary_rrot<0b01101100, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; -def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), - (UXTB16r_rot GPR:$Src, 24)>; +// FIXME: This pattern incorrectly assumes the shl operator is a rotate. +// The transformation should probably be done as a combiner action +// instead so we can include a check for masking back in the upper +// eight bits of the source into the lower eight bits of the result. +//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), +// (UXTB16r_rot GPR:$Src, 24)>; def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (UXTB16r_rot GPR:$Src, 8)>; @@ -1631,16 +1639,24 @@ defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; -// These don't define reg/reg forms, because they are handled above. def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, "rsb", "\t$dst, $a, $b", - [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> { + IIC_iALUi, "rsb", "\t$dst, $a, $b", + [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> { let Inst{25} = 1; } +// The reg/reg form is only defined for the disassembler; for codegen it is +// equivalent to SUBrr. +def RSBrr : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, + IIC_iALUr, "rsb", "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]> { + let Inst{25} = 0; + let Inst{11-4} = 0b00000000; +} + def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, "rsb", "\t$dst, $a, $b", - [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> { + IIC_iALUsr, "rsb", "\t$dst, $a, $b", + [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> { let Inst{25} = 0; } @@ -1667,6 +1683,14 @@ def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), Requires<[IsARM]> { let Inst{25} = 1; } +// The reg/reg form is only defined for the disassembler; for codegen it is +// equivalent to SUBrr. +def RSCrr : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + DPFrm, IIC_iALUr, "rsc", "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]> { + let Inst{25} = 0; + let Inst{11-4} = 0b00000000; +} def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b", [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>, @@ -1716,24 +1740,26 @@ def : ARMPat<(adde GPR:$src, so_imm_not:$imm), // ARM Arithmetic Instruction -- for disassembly only // GPR:$dst = GPR:$a op GPR:$b -class AAI<bits<8> op27_20, bits<4> op7_4, string opc> +class AAI<bits<8> op27_20, bits<4> op7_4, string opc, + list<dag> pattern = [/* For disassembly only; pattern left blank */]> : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, - opc, "\t$dst, $a, $b", - [/* For disassembly only; pattern left blank */]> { + opc, "\t$dst, $a, $b", pattern> { let Inst{27-20} = op27_20; let Inst{7-4} = op7_4; } // Saturating add/subtract -- for disassembly only -def QADD : AAI<0b00010000, 0b0101, "qadd">; +def QADD : AAI<0b00010000, 0b0101, "qadd", + [(set GPR:$dst, (int_arm_qadd GPR:$a, GPR:$b))]>; def QADD16 : AAI<0b01100010, 0b0001, "qadd16">; def QADD8 : AAI<0b01100010, 0b1001, "qadd8">; def QASX : AAI<0b01100010, 0b0011, "qasx">; def QDADD : AAI<0b00010100, 0b0101, "qdadd">; def QDSUB : AAI<0b00010110, 0b0101, "qdsub">; def QSAX : AAI<0b01100010, 0b0101, "qsax">; -def QSUB : AAI<0b00010010, 0b0101, "qsub">; +def QSUB : AAI<0b00010010, 0b0101, "qsub", + [(set GPR:$dst, (int_arm_qsub GPR:$a, GPR:$b))]>; def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">; def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">; def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">; @@ -1793,54 +1819,45 @@ def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), // Signed/Unsigned saturate -- for disassembly only -def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{27-21} = 0b0110101; - let Inst{6-4} = 0b001; -} - -def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def SSAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh), + SatFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{27-21} = 0b0110101; - let Inst{6-4} = 0b101; + let Inst{5-4} = 0b01; } -def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm, +def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm, NoItinerary, "ssat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{27-20} = 0b01101010; let Inst{7-4} = 0b0011; } -def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{27-21} = 0b0110111; - let Inst{6-4} = 0b001; -} - -def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def USAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh), + SatFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{27-21} = 0b0110111; - let Inst{6-4} = 0b101; + let Inst{5-4} = 0b01; } -def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm, +def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm, NoItinerary, "usat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{27-20} = 0b01101110; let Inst{7-4} = 0b0011; } +def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>; +def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>; + //===----------------------------------------------------------------------===// // Bitwise Instructions. // defm AND : AsI1_bin_irs<0b0000, "and", BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; +defm ANDS : AI1_bin_s_irs<0b0000, "and", + BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>; defm ORR : AsI1_bin_irs<0b1100, "orr", BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; defm EOR : AsI1_bin_irs<0b0001, "eor", @@ -1858,11 +1875,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), } // A8.6.18 BFI - Bitfield insert (Encoding A1) -// Added for disassembler with the pattern field purposely left blank. -def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), +def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm), AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfi", "\t$dst, $src, $imm", "", - [/* For disassembly only; pattern left blank */]>, + "bfi", "\t$dst, $val, $imm", "$src = $dst", + [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val, + bf_inv_mask_imm:$imm))]>, Requires<[IsARM, HasV6T2]> { let Inst{27-21} = 0b0111110; let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 @@ -2232,11 +2249,20 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, let Inst{19-16} = 0b1111; } +def lsl_shift_imm : SDNodeXForm<imm, [{ + unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue()); + return CurDAG->getTargetConstant(Sh, MVT::i32); +}]>; + +def lsl_amt : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() < 32); +}], lsl_shift_imm>; + def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), - (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", + (ins GPR:$src1, GPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), - (and (shl GPR:$src2, (i32 imm:$shamt)), + (and (shl GPR:$src2, lsl_amt:$sh), 0xFFFF0000)))]>, Requires<[IsARM, HasV6]> { let Inst{6-4} = 0b001; @@ -2245,26 +2271,37 @@ def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), // Alternate cases for PKHBT where identities eliminate some nodes. def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), (PKHBT GPR:$src1, GPR:$src2, 0)>; -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), - (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$sh)), + (PKHBT GPR:$src1, GPR:$src2, (lsl_shift_imm imm16_31:$sh))>; + +def asr_shift_imm : SDNodeXForm<imm, [{ + unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue()); + return CurDAG->getTargetConstant(Sh, MVT::i32); +}]>; +def asr_amt : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() <= 32); +}], asr_shift_imm>; +// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and +// will match the pattern below. def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst), - (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", + (ins GPR:$src1, GPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), - (and (sra GPR:$src2, imm16_31:$shamt), - 0xFFFF)))]>, Requires<[IsARM, HasV6]> { + (and (sra GPR:$src2, asr_amt:$sh), + 0xFFFF)))]>, + Requires<[IsARM, HasV6]> { let Inst{6-4} = 0b101; } // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))), - (PKHTB GPR:$src1, GPR:$src2, 16)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)), + (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>; def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), - (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), - (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>; + (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)), + (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>; //===----------------------------------------------------------------------===// // Comparison Instructions... @@ -2272,8 +2309,52 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), defm CMP : AI1_cmp_irs<0b1010, "cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; -//FIXME: Disable CMN, as CCodes are backwards from compare expectations -// Compare-to-zero still works out, just not the relationals + +// FIXME: There seems to be a (potential) hardware bug with the CMN instruction +// and comparison with 0. These two pieces of code should give identical +// results: +// +// rsbs r1, r1, 0 +// cmp r0, r1 +// mov r0, #0 +// it ls +// mov r0, #1 +// +// and: +// +// cmn r0, r1 +// mov r0, #0 +// it ls +// mov r0, #1 +// +// However, the CMN gives the *opposite* result when r1 is 0. This is because +// the carry flag is set in the CMP case but not in the CMN case. In short, the +// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the +// value of r0 and the carry bit (because the "carry bit" parameter to +// AddWithCarry is defined as 1 in this case, the carry flag will always be set +// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is +// never a "carry" when this AddWithCarry is performed (because the "carry bit" +// parameter to AddWithCarry is defined as 0). +// +// The AddWithCarry in the CMP case seems to be relying upon the identity: +// +// ~x + 1 = -x +// +// However when x is 0 and unsigned, this doesn't hold: +// +// x = 0 +// ~x = 0xFFFF FFFF +// ~x + 1 = 0x1 0000 0000 +// (-x = 0) != (0x1 0000 0000 = ~x + 1) +// +// Therefore, we should disable *all* versions of CMN, especially when comparing +// against zero, until we can limit when the CMN instruction is used (when we +// know that the RHS is not 0) or when we have a hardware fix for this. +// +// (See the ARM docs for the "AddWithCarry" pseudo-code.) +// +// This is related to <rdar://problem/7569620>. +// //defm CMN : AI1_cmp_irs<0b1011, "cmn", // BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; @@ -2298,8 +2379,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, Defs = [CPSR] in { def BCCi64 : PseudoInst<(outs), - (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), - IIC_Br, + (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), + IIC_Br, "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc", [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>; @@ -2346,102 +2427,63 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def Int_MemBarrierV7 : AInoP<(outs), (ins), - Pseudo, NoItinerary, - "dmb", "", - [(ARMMemBarrierV7)]>, - Requires<[IsARM, HasV7]> { +def DMBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dmb", "", + [(ARMMemBarrier)]>, Requires<[IsARM, HasDB]> { let Inst{31-4} = 0xf57ff05; // FIXME: add support for options other than a full system DMB // See DMB disassembly-only variants below. let Inst{3-0} = 0b1111; } -def Int_SyncBarrierV7 : AInoP<(outs), (ins), - Pseudo, NoItinerary, - "dsb", "", - [(ARMSyncBarrierV7)]>, - Requires<[IsARM, HasV7]> { +def DSBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dsb", "", + [(ARMSyncBarrier)]>, Requires<[IsARM, HasDB]> { let Inst{31-4} = 0xf57ff04; // FIXME: add support for options other than a full system DSB // See DSB disassembly-only variants below. let Inst{3-0} = 0b1111; } -def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero), - Pseudo, NoItinerary, +def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary, "mcr", "\tp15, 0, $zero, c7, c10, 5", - [(ARMMemBarrierV6 GPR:$zero)]>, + [(ARMMemBarrierMCR GPR:$zero)]>, Requires<[IsARM, HasV6]> { // FIXME: add support for options other than a full system DMB // FIXME: add encoding } -def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero), - Pseudo, NoItinerary, +def DSB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary, "mcr", "\tp15, 0, $zero, c7, c10, 4", - [(ARMSyncBarrierV6 GPR:$zero)]>, + [(ARMSyncBarrierMCR GPR:$zero)]>, Requires<[IsARM, HasV6]> { // FIXME: add support for options other than a full system DSB // FIXME: add encoding } } -// Helper class for multiclass MemB -- for disassembly only -class AMBI<string opc, string asm> - : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm, - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV7]> { - let Inst{31-20} = 0xf57; -} - -multiclass MemB<bits<4> op7_4, string opc> { - - def st : AMBI<opc, "\tst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1110; - } - - def ish : AMBI<opc, "\tish"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1011; - } - - def ishst : AMBI<opc, "\tishst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1010; - } - - def nsh : AMBI<opc, "\tnsh"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0111; - } - - def nshst : AMBI<opc, "\tnshst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0110; - } +// Memory Barrier Operations Variants -- for disassembly only - def osh : AMBI<opc, "\tosh"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0011; - } +def memb_opt : Operand<i32> { + let PrintMethod = "printMemBOption"; +} - def oshst : AMBI<opc, "\toshst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0010; - } +class AMBI<bits<4> op7_4, string opc> + : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, opc, "\t$opt", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasDB]> { + let Inst{31-8} = 0xf57ff0; + let Inst{7-4} = op7_4; } // These DMB variants are for disassembly only. -defm DMB : MemB<0b0101, "dmb">; +def DMBvar : AMBI<0b0101, "dmb">; // These DSB variants are for disassembly only. -defm DSB : MemB<0b0100, "dsb">; +def DSBvar : AMBI<0b0100, "dsb">; // ISB has only full system option -- for disassembly only -def ISBsy : AMBI<"isb", ""> { - let Inst{7-4} = 0b0110; +def ISBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>, + Requires<[IsARM, HasDB]> { + let Inst{31-4} = 0xf57ff06; let Inst{3-0} = 0b1111; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 7f7eb980abe8..4d2f1169061f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -93,6 +93,11 @@ def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; +def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisSameAs<1, 2>]>; +def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; +def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; + def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; @@ -100,14 +105,14 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); - unsigned EltBits; + unsigned EltBits = 0; uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 32 && EltVal == 0); }]>; def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); - unsigned EltBits; + unsigned EltBits = 0; uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 8 && EltVal == 0xff); }]>; @@ -124,15 +129,16 @@ def nModImm : Operand<i32> { // NEON load / store instructions //===----------------------------------------------------------------------===// -let mayLoad = 1, neverHasSideEffects = 1 in { // Use vldmia to load a Q register as a D register pair. // This is equivalent to VLDMD except that it has a Q register operand // instead of a pair of D registers. def VLDMQ - : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p), + : AXDI4<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p), IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>; + "vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "", + [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>; +let mayLoad = 1, neverHasSideEffects = 1 in { // Use vld1 to load a Q register as a D register pair. // This alternative to VLDMQ allows an alignment to be specified. // This is equivalent to VLD1q64 except that it has a Q register operand. @@ -141,15 +147,16 @@ def VLD1q IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; } // mayLoad = 1, neverHasSideEffects = 1 -let mayStore = 1, neverHasSideEffects = 1 in { // Use vstmia to store a Q register as a D register pair. // This is equivalent to VSTMD except that it has a Q register operand // instead of a pair of D registers. def VSTMQ - : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p), + : AXDI4<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p), IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>; + "vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "", + [(store (v2f64 QPR:$src), addrmode4:$addr)]>; +let mayStore = 1, neverHasSideEffects = 1 in { // Use vst1 to store a Q register as a D register pair. // This alternative to VSTMQ allows an alignment to be specified. // This is equivalent to VST1q64 except that it has a Q register operand. @@ -160,6 +167,25 @@ def VST1q let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +// Classes for VLD* pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VLDQPseudo + : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">; +class VLDQWBPseudo + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VST, + "$addr.addr = $wb">; +class VLDQQPseudo + : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">; +class VLDQQWBPseudo + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VST, + "$addr.addr = $wb">; +class VLDQQQQWBPseudo + : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST, + "$addr.addr = $wb, $src = $dst">; + // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), @@ -180,6 +206,11 @@ def VLD1q16 : VLD1Q<0b0100, "16">; def VLD1q32 : VLD1Q<0b1000, "32">; def VLD1q64 : VLD1Q<0b1100, "64">; +def VLD1q8Pseudo : VLDQPseudo; +def VLD1q16Pseudo : VLDQPseudo; +def VLD1q32Pseudo : VLDQPseudo; +def VLD1q64Pseudo : VLDQPseudo; + // ...with address register writeback: class VLD1DWB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), @@ -202,6 +233,11 @@ def VLD1q16_UPD : VLD1QWB<0b0100, "16">; def VLD1q32_UPD : VLD1QWB<0b1000, "32">; def VLD1q64_UPD : VLD1QWB<0b1100, "64">; +def VLD1q8Pseudo_UPD : VLDQWBPseudo; +def VLD1q16Pseudo_UPD : VLDQWBPseudo; +def VLD1q32Pseudo_UPD : VLDQWBPseudo; +def VLD1q64Pseudo_UPD : VLDQWBPseudo; + // ...with 3 registers (some of these are only for the disassembler): class VLD1D3<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), @@ -222,6 +258,9 @@ def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">; +def VLD1d64TPseudo : VLDQQPseudo; +def VLD1d64TPseudo_UPD : VLDQQWBPseudo; + // ...with 4 registers (some of these are only for the disassembler): class VLD1D4<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), @@ -244,6 +283,9 @@ def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">; +def VLD1d64QPseudo : VLDQQPseudo; +def VLD1d64QPseudo_UPD : VLDQQWBPseudo; + // VLD2 : Vector Load (multiple 2-element structures) class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), @@ -263,6 +305,14 @@ def VLD2q8 : VLD2Q<0b0000, "8">; def VLD2q16 : VLD2Q<0b0100, "16">; def VLD2q32 : VLD2Q<0b1000, "32">; +def VLD2d8Pseudo : VLDQPseudo; +def VLD2d16Pseudo : VLDQPseudo; +def VLD2d32Pseudo : VLDQPseudo; + +def VLD2q8Pseudo : VLDQQPseudo; +def VLD2q16Pseudo : VLDQQPseudo; +def VLD2q32Pseudo : VLDQQPseudo; + // ...with address register writeback: class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), @@ -284,6 +334,14 @@ def VLD2q8_UPD : VLD2QWB<0b0000, "8">; def VLD2q16_UPD : VLD2QWB<0b0100, "16">; def VLD2q32_UPD : VLD2QWB<0b1000, "32">; +def VLD2d8Pseudo_UPD : VLDQWBPseudo; +def VLD2d16Pseudo_UPD : VLDQWBPseudo; +def VLD2d32Pseudo_UPD : VLDQWBPseudo; + +def VLD2q8Pseudo_UPD : VLDQQWBPseudo; +def VLD2q16Pseudo_UPD : VLDQQWBPseudo; +def VLD2q32Pseudo_UPD : VLDQQWBPseudo; + // ...with double-spaced registers (for disassembly only): def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; @@ -302,6 +360,10 @@ def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; +def VLD3d8Pseudo : VLDQQPseudo; +def VLD3d16Pseudo : VLDQQPseudo; +def VLD3d32Pseudo : VLDQQPseudo; + // ...with address register writeback: class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, @@ -314,6 +376,10 @@ def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; +def VLD3d8Pseudo_UPD : VLDQQWBPseudo; +def VLD3d16Pseudo_UPD : VLDQQWBPseudo; +def VLD3d32Pseudo_UPD : VLDQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; @@ -322,10 +388,14 @@ def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; +def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo; +def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo; +def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">; -def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">; -def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">; +def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -338,6 +408,10 @@ def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; +def VLD4d8Pseudo : VLDQQPseudo; +def VLD4d16Pseudo : VLDQQPseudo; +def VLD4d32Pseudo : VLDQQPseudo; + // ...with address register writeback: class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, @@ -350,6 +424,10 @@ def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; +def VLD4d8Pseudo_UPD : VLDQQWBPseudo; +def VLD4d16Pseudo_UPD : VLDQQWBPseudo; +def VLD4d32Pseudo_UPD : VLDQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; @@ -358,10 +436,14 @@ def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; +def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo; +def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo; +def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">; -def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">; -def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">; +def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo; // VLD1LN : Vector Load (single element to one lane) // FIXME: Not yet implemented. @@ -486,6 +568,25 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { +// Classes for VST* pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VSTQPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">; +class VSTQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, + "$addr.addr = $wb">; +class VSTQQPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">; +class VSTQQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST, + "$addr.addr = $wb">; +class VSTQQQQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST, + "$addr.addr = $wb">; + // VST1 : Vector Store (multiple single elements) class VST1D<bits<4> op7_4, string Dt> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, @@ -505,6 +606,11 @@ def VST1q16 : VST1Q<0b0100, "16">; def VST1q32 : VST1Q<0b1000, "32">; def VST1q64 : VST1Q<0b1100, "64">; +def VST1q8Pseudo : VSTQPseudo; +def VST1q16Pseudo : VSTQPseudo; +def VST1q32Pseudo : VSTQPseudo; +def VST1q64Pseudo : VSTQPseudo; + // ...with address register writeback: class VST1DWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), @@ -525,6 +631,11 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">; def VST1q32_UPD : VST1QWB<0b1000, "32">; def VST1q64_UPD : VST1QWB<0b1100, "64">; +def VST1q8Pseudo_UPD : VSTQWBPseudo; +def VST1q16Pseudo_UPD : VSTQWBPseudo; +def VST1q32Pseudo_UPD : VSTQWBPseudo; +def VST1q64Pseudo_UPD : VSTQWBPseudo; + // ...with 3 registers (some of these are only for the disassembler): class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), @@ -547,6 +658,9 @@ def VST1d16T_UPD : VST1D3WB<0b0100, "16">; def VST1d32T_UPD : VST1D3WB<0b1000, "32">; def VST1d64T_UPD : VST1D3WB<0b1100, "64">; +def VST1d64TPseudo : VSTQQPseudo; +def VST1d64TPseudo_UPD : VSTQQWBPseudo; + // ...with 4 registers (some of these are only for the disassembler): class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), @@ -570,6 +684,9 @@ def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; def VST1d64Q_UPD : VST1D4WB<0b1100, "64">; +def VST1d64QPseudo : VSTQQPseudo; +def VST1d64QPseudo_UPD : VSTQQWBPseudo; + // VST2 : Vector Store (multiple 2-element structures) class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), @@ -589,6 +706,14 @@ def VST2q8 : VST2Q<0b0000, "8">; def VST2q16 : VST2Q<0b0100, "16">; def VST2q32 : VST2Q<0b1000, "32">; +def VST2d8Pseudo : VSTQPseudo; +def VST2d16Pseudo : VSTQPseudo; +def VST2d32Pseudo : VSTQPseudo; + +def VST2q8Pseudo : VSTQQPseudo; +def VST2q16Pseudo : VSTQQPseudo; +def VST2q32Pseudo : VSTQQPseudo; + // ...with address register writeback: class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -610,6 +735,14 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">; def VST2q16_UPD : VST2QWB<0b0100, "16">; def VST2q32_UPD : VST2QWB<0b1000, "32">; +def VST2d8Pseudo_UPD : VSTQWBPseudo; +def VST2d16Pseudo_UPD : VSTQWBPseudo; +def VST2d32Pseudo_UPD : VSTQWBPseudo; + +def VST2q8Pseudo_UPD : VSTQQWBPseudo; +def VST2q16Pseudo_UPD : VSTQQWBPseudo; +def VST2q32Pseudo_UPD : VSTQQWBPseudo; + // ...with double-spaced registers (for disassembly only): def VST2b8 : VST2D<0b1001, 0b0000, "8">; def VST2b16 : VST2D<0b1001, 0b0100, "16">; @@ -628,6 +761,10 @@ def VST3d8 : VST3D<0b0100, 0b0000, "8">; def VST3d16 : VST3D<0b0100, 0b0100, "16">; def VST3d32 : VST3D<0b0100, 0b1000, "32">; +def VST3d8Pseudo : VSTQQPseudo; +def VST3d16Pseudo : VSTQQPseudo; +def VST3d32Pseudo : VSTQQPseudo; + // ...with address register writeback: class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -640,6 +777,10 @@ def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; +def VST3d8Pseudo_UPD : VSTQQWBPseudo; +def VST3d16Pseudo_UPD : VSTQQWBPseudo; +def VST3d32Pseudo_UPD : VSTQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VST3q8 : VST3D<0b0101, 0b0000, "8">; def VST3q16 : VST3D<0b0101, 0b0100, "16">; @@ -648,10 +789,14 @@ def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; +def VST3q8Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q16Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q32Pseudo_UPD : VSTQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">; -def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">; -def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">; +def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo; // VST4 : Vector Store (multiple 4-element structures) class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -664,6 +809,10 @@ def VST4d8 : VST4D<0b0000, 0b0000, "8">; def VST4d16 : VST4D<0b0000, 0b0100, "16">; def VST4d32 : VST4D<0b0000, 0b1000, "32">; +def VST4d8Pseudo : VSTQQPseudo; +def VST4d16Pseudo : VSTQQPseudo; +def VST4d32Pseudo : VSTQQPseudo; + // ...with address register writeback: class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -676,6 +825,10 @@ def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; +def VST4d8Pseudo_UPD : VSTQQWBPseudo; +def VST4d16Pseudo_UPD : VSTQQWBPseudo; +def VST4d32Pseudo_UPD : VSTQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VST4q8 : VST4D<0b0001, 0b0000, "8">; def VST4q16 : VST4D<0b0001, 0b0100, "16">; @@ -684,10 +837,14 @@ def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; +def VST4q8Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q16Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q32Pseudo_UPD : VSTQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">; -def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">; -def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">; +def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; // VST1LN : Vector Store (single element from one lane) // FIXME: Not yet implemented. @@ -879,6 +1036,15 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; +// Narrow 2-register operations. +class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyD, ValueType TyQ, SDNode OpNode> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), + (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", + [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>; + // Narrow 2-register intrinsics. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -888,14 +1054,14 @@ class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; -// Long 2-register intrinsics (currently only used for VMOVL). -class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp> +// Long 2-register operations (currently only used for VMOVL). +class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst), (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; + [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>; // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> @@ -1150,6 +1316,24 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, (ResTy (NEONvduplane (OpTy DPR_8:$src3), imm:$lane)))))))]>; +// Neon Intrinsic-Op instructions (VABA): double- and quad-register. +class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set DPR:$dst, (Ty (OpNode DPR:$src1, + (Ty (IntOp (Ty DPR:$src2), (Ty DPR:$src3))))))]>; +class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, (Ty (OpNode QPR:$src1, + (Ty (IntOp (Ty QPR:$src2), (Ty QPR:$src3))))))]>; + // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, @@ -1169,6 +1353,53 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; +// Long Multiply-Add/Sub operations. +class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, (OpNode (TyQ QPR:$src1), + (TyQ (MulOp (TyD DPR:$src2), + (TyD DPR:$src3)))))]>; +class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), + (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", + [(set QPR:$dst, + (OpNode (TyQ QPR:$src1), + (TyQ (MulOp (TyD DPR:$src2), + (TyD (NEONvduplane (TyD DPR_VFP2:$src3), + imm:$lane))))))]>; +class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), + (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", + [(set QPR:$dst, + (OpNode (TyQ QPR:$src1), + (TyQ (MulOp (TyD DPR:$src2), + (TyD (NEONvduplane (TyD DPR_8:$src3), + imm:$lane))))))]>; + +// Long Intrinsic-Op vector operations with explicit extend (VABAL). +class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, (OpNode (TyQ QPR:$src1), + (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src2), + (TyD DPR:$src3)))))))]>; + // Neon Long 3-argument intrinsic. The destination register is // a quad-register and is also used as the first source operand register. class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, @@ -1217,6 +1448,61 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let isCommutable = Commutable; } +// Long 3-register operations. +class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> { + let isCommutable = Commutable; +} +class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + [(set QPR:$dst, + (TyQ (OpNode (TyD DPR:$src1), + (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>; +class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + [(set QPR:$dst, + (TyQ (OpNode (TyD DPR:$src1), + (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>; + +// Long 3-register operations with explicitly extended operands. +class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, + bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))), + (TyQ (ExtOp (TyD DPR:$src2)))))]> { + let isCommutable = Commutable; +} + +// Long 3-register intrinsics with explicit extend (VABDL). +class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1), + (TyD DPR:$src2))))))]> { + let isCommutable = Commutable; +} + // Long 3-register intrinsics. class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, @@ -1248,14 +1534,15 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, (OpTy (NEONvduplane (OpTy DPR_8:$src2), imm:$lane)))))]>; -// Wide 3-register intrinsics. -class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, - Intrinsic IntOp, bit Commutable> +// Wide 3-register operations. +class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, + SDNode OpNode, SDNode ExtOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD, OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { + [(set QPR:$dst, (OpNode (TyQ QPR:$src1), + (TyQ (ExtOp (TyD DPR:$src2)))))]> { let isCommutable = Commutable; } @@ -1488,6 +1775,23 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, } +// Neon Narrowing 2-register vector operations, +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, OpNode>; + def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, OpNode>; + def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, OpNode>; +} + // Neon Narrowing 2-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, @@ -1508,14 +1812,14 @@ multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). // source operand element sizes of 16, 32 and 64 bits: -multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, - string OpcodeStr, string Dt, Intrinsic IntOp> { - def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; - def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; - def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; +multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, + string OpcodeStr, string Dt, SDNode OpNode> { + def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; + def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; + def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; } @@ -1607,6 +1911,47 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, } +// Neon Long 3-register vector operations. + +multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> { + def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, OpNode, Commutable>; + def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, OpNode, Commutable>; + def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, OpNode, Commutable>; +} + +multiclass N3VLSL_HS<bit op24, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, + !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; + def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, + !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; +} + +multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, + SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, OpNode, ExtOp, Commutable>; + def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, OpNode, ExtOp, Commutable>; + def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, OpNode, ExtOp, Commutable>; +} + // Neon Long 3-register vector intrinsics. // First with only element sizes of 16 and 32 bits: @@ -1643,21 +1988,36 @@ multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, v8i16, v8i8, IntOp, Commutable>; } +// ....with explicit extend (VABDL). +multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, IntOp, ExtOp, Commutable>; + def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, IntOp, ExtOp, Commutable>; + def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, IntOp, ExtOp, Commutable>; +} + // Neon Wide 3-register vector intrinsics, // source operand element sizes of 8, 16 and 32 bits: -multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { - def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, - OpcodeStr, !strconcat(Dt, "8"), - v8i16, v8i8, IntOp, Commutable>; - def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, - OpcodeStr, !strconcat(Dt, "16"), - v4i32, v4i16, IntOp, Commutable>; - def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, - OpcodeStr, !strconcat(Dt, "32"), - v2i64, v2i32, IntOp, Commutable>; +multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, + SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, OpNode, ExtOp, Commutable>; + def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, OpNode, ExtOp, Commutable>; + def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, OpNode, ExtOp, Commutable>; } @@ -1700,6 +2060,29 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8, mul, ShOp>; } +// Neon Intrinsic-Op vector operations, +// element sizes of 8, 16 and 32 bits: +multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, + string OpcodeStr, string Dt, Intrinsic IntOp, + SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; + def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; + def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; + + // 128-bit vector types. + def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; + def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; + def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; +} + // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, @@ -1723,6 +2106,29 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, } +// Neon Long Multiply-Op vector operations, +// element sizes of 8, 16 and 32 bits: +multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, SDNode MulOp, + SDNode OpNode> { + def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, + !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; + def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, + !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; + def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, + !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; +} + +multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, + string Dt, SDNode MulOp, SDNode OpNode> { + def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, + !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; + def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, + !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; +} + + // Neon Long 3-argument intrinsics. // First with only element sizes of 16 and 32 bits: @@ -1752,6 +2158,21 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; } +// ....with explicit extend (VABAL). +multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { + def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, + IntOp, ExtOp, OpNode>; + def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, + IntOp, ExtOp, OpNode>; + def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, + IntOp, ExtOp, OpNode>; +} + // Neon 2-register vector intrinsics, // element sizes of 8, 16 and 32 bits: @@ -1996,13 +2417,13 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "s", int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "u", int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "s", add, sext, 1>; +defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "u", add, zext, 1>; // VADDW : Vector Add Wide (Q = Q + D) -defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; -defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; +defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; +defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; // VHADD : Vector Halving Add defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, @@ -2113,16 +2534,14 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "s", int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "u", int_arm_neon_vmullu, 1>; +defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", NEONvmulls, 1>; +defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", NEONvmullu, 1>; def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", v8i16, v8i8, int_arm_neon_vmullp, 1>; -defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", - int_arm_neon_vmulls>; -defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", - int_arm_neon_vmullu>; +defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; +defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, @@ -2172,13 +2591,13 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) -defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlal", "s", int_arm_neon_vmlals>; -defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlal", "u", int_arm_neon_vmlalu>; +defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "s", NEONvmulls, add>; +defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "u", NEONvmullu, add>; -defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; -defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; +defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; +defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, @@ -2224,13 +2643,13 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) -defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlsl", "s", int_arm_neon_vmlsls>; -defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlsl", "u", int_arm_neon_vmlslu>; +defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "s", NEONvmulls, sub>; +defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "u", NEONvmullu, sub>; -defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; -defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; +defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; +defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, @@ -2247,13 +2666,13 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "s", int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "u", int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "s", sub, sext, 0>; +defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "u", sub, zext, 0>; // VSUBW : Vector Subtract Wide (Q = Q - D) -defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; -defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; +defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; +defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; // VHSUB : Vector Halving Subtract defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, @@ -2469,32 +2888,32 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, // VABD : Vector Absolute Difference defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "s", int_arm_neon_vabds, 0>; + "vabd", "s", int_arm_neon_vabds, 1>; defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "u", int_arm_neon_vabdu, 0>; + "vabd", "u", int_arm_neon_vabdu, 1>; def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, - "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; + "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, - "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; + "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabdl", "s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabdl", "u", int_arm_neon_vabdlu, 0>; +defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, + "vabdl", "s", int_arm_neon_vabds, zext, 1>; +defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, + "vabdl", "u", int_arm_neon_vabdu, zext, 1>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "u", int_arm_neon_vabau>; +defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "s", int_arm_neon_vabds, add>; +defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "u", int_arm_neon_vabdu, add>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD, - "vabal", "s", int_arm_neon_vabals>; -defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD, - "vabal", "u", int_arm_neon_vabalu>; +defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, + "vabal", "s", int_arm_neon_vabds, zext, add>; +defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, + "vabal", "u", int_arm_neon_vabdu, zext, add>; // Vector Maximum and Minimum. @@ -3113,8 +3532,8 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; // VMOVN : Vector Narrowing Move -defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, - "vmovn", "i", int_arm_neon_vmovn>; +defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, + "vmovn", "i", trunc>; // VQMOVN : Vector Saturating Narrowing Move defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn", "s", int_arm_neon_vqmovns>; @@ -3123,10 +3542,8 @@ defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun", "s", int_arm_neon_vqmovnsu>; // VMOVL : Vector Lengthening Move -defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s", - int_arm_neon_vmovls>; -defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u", - int_arm_neon_vmovlu>; +defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; +defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; // Vector Conversions. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index bc0790dccbb5..a13ff1232749 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -221,9 +221,13 @@ def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 // ADD rd, sp, #imm8 +// This is rematerializable, which is particularly useful for taking the +// address of locals. +let isReMaterializable = 1 in { def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi, "add\t$dst, $sp, $rhs", []>, T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8 +} // ADD sp, sp, #imm7 def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, @@ -251,19 +255,6 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, let Inst{2-0} = 0b101; } -// Pseudo instruction that will expand into a tSUBspi + a copy. -let usesCustomInserter = 1 in { // Expanded after instruction selection. -def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), - NoItinerary, "${:comment} sub\t$dst, $rhs", []>; - -def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - NoItinerary, "${:comment} add\t$dst, $rhs", []>; - -let Defs = [CPSR] in -def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - NoItinerary, "${:comment} and\t$dst, $rhs", []>; -} // usesCustomInserter - //===----------------------------------------------------------------------===// // Control Flow Instructions. // @@ -378,7 +369,7 @@ let isBranch = 1, isTerminator = 1 in { def tBR_JTr : T1JTI<(outs), (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt", + IIC_Br, "mov\tpc, $target\n\t.align\t2$jt", [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>, Encoding16 { let Inst{15-7} = 0b010001101; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index bbe675e81ab1..6ba0a44be470 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -32,7 +32,7 @@ def t2_so_reg : Operand<i32>, // reg imm ComplexPattern<i32, 2, "SelectT2ShifterOperandReg", [shl,srl,sra,rotr]> { let PrintMethod = "printT2SOOperand"; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops rGPR, i32imm); } // t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value @@ -51,10 +51,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // represented in the imm field in the same 12-bit form that they are encoded // into t2_so_imm instructions: the 8-bit immediate is the least significant // bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11]. -def t2_so_imm : Operand<i32>, - PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; -}]>; +def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]>; // t2_so_imm_not - Match an immediate that is a complement // of a t2_so_imm. @@ -162,7 +159,7 @@ def t2am_imm8s4_offset : Operand<i32> { def t2addrmode_so_reg : Operand<i32>, ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> { let PrintMethod = "printT2AddrModeSoRegOperand"; - let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); + let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm); } @@ -176,9 +173,9 @@ def t2addrmode_so_reg : Operand<i32>, multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, bit Cheap = 0, bit ReMat = 0> { // shifted imm - def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, + def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, opc, "\t$dst, $src", - [(set GPR:$dst, (opnode t2_so_imm:$src))]> { + [(set rGPR:$dst, (opnode t2_so_imm:$src))]> { let isAsCheapAsAMove = Cheap; let isReMaterializable = ReMat; let Inst{31-27} = 0b11110; @@ -189,9 +186,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, + def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set rGPR:$dst, (opnode rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -202,9 +199,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, + def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode t2_so_reg:$src))]> { + [(set rGPR:$dst, (opnode t2_so_reg:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -217,11 +214,11 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, /// binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0, string wide =""> { + bit Commutable = 0, string wide = ""> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -229,9 +226,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -242,9 +239,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -259,23 +256,35 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode, T2I_bin_irs<opcod, opc, opnode, Commutable, ".w">; /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are -/// reversed. It doesn't define the 'rr' form since it's handled by its -/// T2I_bin_irs counterpart. -multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> { +/// reversed. The 'rr' form is only defined for the disassembler; for codegen +/// it is equivalent to the T2I_bin_irs counterpart. +multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, opc, ".w\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; let Inst{20} = ?; // The S bit. let Inst{15} = 0; } + // register + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, rGPR:$lhs), IIC_iALUr, + opc, "\t$dst, $rhs, $lhs", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = ?; // The S bit. + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type + } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, opc, "\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -289,9 +298,9 @@ let Defs = [CPSR] in { multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -299,9 +308,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -312,9 +321,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -328,9 +337,12 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + // The register-immediate version is re-materializable. This is useful + // in particular for taking the address of a local. + let isReMaterializable = 1 in { + def ri : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24} = 1; @@ -338,10 +350,11 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{20} = 0; // The S bit. let Inst{15} = 0; } + } // 12-bit imm - def ri12 : T2I<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi, + def ri12 : T2I<(outs rGPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi, !strconcat(opc, "w"), "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24} = 0; @@ -350,9 +363,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -364,9 +377,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24} = 1; @@ -382,9 +395,9 @@ let Uses = [CPSR] in { multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -393,9 +406,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>, Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; @@ -407,9 +420,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -423,9 +436,9 @@ let Defs = [CPSR] in { multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -434,9 +447,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>, Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; @@ -448,9 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -461,13 +474,14 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, } } -/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit. +/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register +/// version is not needed since this is only for codegen. let Defs = [CPSR] in { multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + def ri : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -475,9 +489,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { let Inst{15} = 0; } // shifted register - def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + def rs : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, !strconcat(opc, "s"), "\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -490,18 +504,18 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // rotate operation that produces a value. multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { // 5-bit imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, imm1_31:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-21} = 0b010010; let Inst{19-16} = 0b1111; // Rn let Inst{5-4} = opcod; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iMOVsr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-21} = opcod; @@ -513,7 +527,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { /// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test /// patterns. Similar to T2I_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. -let Defs = [CPSR] in { +let isCompare = 1, Defs = [CPSR] in { multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi, @@ -527,9 +541,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> { let Inst{11-8} = 0b1111; // Rd } // register - def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, + def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr, opc, ".w\t$lhs, $rhs", - [(opnode GPR:$lhs, GPR:$rhs)]> { + [(opnode GPR:$lhs, rGPR:$rhs)]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -639,9 +653,9 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { /// T2I_unary_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set rGPR:$dst, (opnode rGPR:$src))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -650,9 +664,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, opc, ".w\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -665,9 +679,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { // UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, opc, "\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]>, + [(set rGPR:$dst, (opnode rGPR:$src))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -677,9 +691,9 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, + [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -694,7 +708,7 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { // SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern // supported yet. multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, opc, "\t$dst, $src", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -704,7 +718,7 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -719,9 +733,9 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { /// T2I_bin_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, + def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr, opc, "\t$dst, $LHS, $RHS", - [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>, + [(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -730,10 +744,10 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot), IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", - [(set GPR:$dst, (opnode GPR:$LHS, - (rotr GPR:$RHS, rot_imm:$rot)))]>, + [(set rGPR:$dst, (opnode rGPR:$LHS, + (rotr rGPR:$RHS, rot_imm:$rot)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -747,7 +761,7 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { // DO variant - disassembly only, no pattern multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { - def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, + def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr, opc, "\t$dst, $LHS, $RHS", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -756,7 +770,7 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot), IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -779,8 +793,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { // assembler. let neverHasSideEffects = 1 in { let isReMaterializable = 1 in -def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, - "adr$p.w\t$dst, #$label", []> { +def t2LEApcrel : T2XI<(outs rGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, + "adr${p}.w\t$dst, #$label", []> { let Inst{31-27} = 0b11110; let Inst{25-24} = 0b10; // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) @@ -790,9 +804,9 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, let Inst{15} = 0; } } // neverHasSideEffects -def t2LEApcrelJT : T2XI<(outs GPR:$dst), +def t2LEApcrelJT : T2XI<(outs rGPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, - "adr$p.w\t$dst, #${label}_${id}", []> { + "adr${p}.w\t$dst, #${label}_${id}", []> { let Inst{31-27} = 0b11110; let Inst{25-24} = 0b10; // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) @@ -866,9 +880,9 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), } // Signed and unsigned division on v7-M -def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, +def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, "sdiv", "\t$dst, $a, $b", - [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>, + [(set rGPR:$dst, (sdiv rGPR:$a, rGPR:$b))]>, Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; @@ -877,9 +891,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, let Inst{7-4} = 0b1111; } -def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, +def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, "udiv", "\t$dst, $a, $b", - [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>, + [(set rGPR:$dst, (udiv rGPR:$a, rGPR:$b))]>, Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; @@ -888,17 +902,6 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, let Inst{7-4} = 0b1111; } -// Pseudo instruction that will expand into a t2SUBrSPi + a copy. -let usesCustomInserter = 1 in { // Expanded after instruction selection. -def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>; -def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>; -def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>; -} // usesCustomInserter - - //===----------------------------------------------------------------------===// // Load / store Instructions. // @@ -917,10 +920,10 @@ defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword -def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), +def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2), (ins t2addrmode_imm8s4:$addr), IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>; -def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), +def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2), (ins i32imm:$addr), IIC_iLoadi, "ldrd", "\t$dst1, $addr", []> { let Inst{19-16} = 0b1111; // Rn @@ -967,6 +970,11 @@ def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr), def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), (t2LDRHpci tconstpool:$addr)>; +// FIXME: The destination register of the loads and stores can't be PC, but +// can be SP. We need another regclass (similar to rGPR) to represent +// that. Not a pressing issue since these are selected manually, +// not via pattern. + // Indexed loads let mayLoad = 1, neverHasSideEffects = 1 in { def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb), @@ -1286,9 +1294,9 @@ def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in -def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, +def t2MOVi : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, "mov", ".w\t$dst, $src", - [(set GPR:$dst, t2_so_imm:$src)]> { + [(set rGPR:$dst, t2_so_imm:$src)]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b0010; @@ -1298,9 +1306,9 @@ def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, } let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, +def t2MOVi16 : T2I<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", "\t$dst, $src", - [(set GPR:$dst, imm0_65535:$src)]> { + [(set rGPR:$dst, imm0_65535:$src)]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0010; @@ -1309,10 +1317,10 @@ def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, } let Constraints = "$src = $dst" in -def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, +def t2MOVTi16 : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$imm), IIC_iMOVi, "movt", "\t$dst, $imm", - [(set GPR:$dst, - (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]> { + [(set rGPR:$dst, + (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0110; @@ -1320,7 +1328,7 @@ def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, let Inst{15} = 0; } -def : T2Pat<(or GPR:$src, 0xffff0000), (t2MOVTi16 GPR:$src, 0xffff)>; +def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; //===----------------------------------------------------------------------===// // Extend Instructions. @@ -1352,10 +1360,14 @@ defm t2UXTH : T2I_unary_rrot<0b001, "uxth", defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; -def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>; -def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>; +// FIXME: This pattern incorrectly assumes the shl operator is a rotate. +// The transformation should probably be done as a combiner action +// instead so we can include a check for masking back in the upper +// eight bits of the source into the lower eight bits of the result. +//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF), +// (t2UXTB16r_rot rGPR:$Src, 24)>, Requires<[HasT2ExtractPack]>; +def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF), + (t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>; defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; @@ -1389,7 +1401,7 @@ defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc", BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; // RSB -defm t2RSB : T2I_rbin_is <0b1110, "rsb", +defm t2RSB : T2I_rbin_irs <0b1110, "rsb", BinOpFrag<(sub node:$LHS, node:$RHS)>>; defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>; @@ -1409,18 +1421,18 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; let AddedComplexity = 1 in -def : T2Pat<(addc GPR:$src, imm0_255_neg:$imm), - (t2SUBSri GPR:$src, imm0_255_neg:$imm)>; -def : T2Pat<(addc GPR:$src, t2_so_imm_neg:$imm), - (t2SUBSri GPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm), + (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>; +def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm), + (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>; // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. let AddedComplexity = 1 in -def : T2Pat<(adde GPR:$src, imm0_255_not:$imm), - (t2SBCSri GPR:$src, imm0_255_not:$imm)>; -def : T2Pat<(adde GPR:$src, t2_so_imm_not:$imm), - (t2SBCSri GPR:$src, t2_so_imm_not:$imm)>; +def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm), + (t2SBCSri rGPR:$src, imm0_255_not:$imm)>; +def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm), + (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>; // Select Bytes -- for disassembly only @@ -1437,9 +1449,10 @@ def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel", // A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned) // And Miscellaneous operations -- for disassembly only -class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc> - : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc, - "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> { +class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc, + list<dag> pat = [/* For disassembly only; pattern left blank */]> + : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), NoItinerary, opc, + "\t$dst, $a, $b", pat> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0101; let Inst{22-20} = op22_20; @@ -1449,14 +1462,16 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc> // Saturating add/subtract -- for disassembly only -def t2QADD : T2I_pam<0b000, 0b1000, "qadd">; +def t2QADD : T2I_pam<0b000, 0b1000, "qadd", + [(set rGPR:$dst, (int_arm_qadd rGPR:$a, rGPR:$b))]>; def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">; def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">; def t2QASX : T2I_pam<0b010, 0b0001, "qasx">; def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">; def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">; def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">; -def t2QSUB : T2I_pam<0b000, 0b1010, "qsub">; +def t2QSUB : T2I_pam<0b000, 0b1010, "qsub", + [(set rGPR:$dst, (int_arm_qsub rGPR:$a, rGPR:$b))]>; def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">; def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">; def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">; @@ -1498,37 +1513,27 @@ def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">; // Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only -def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), +def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b), NoItinerary, "usad8", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8", +def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), NoItinerary, "usada8", "\t$dst, $a, $b, $acc", []>; // Signed/Unsigned saturate -- for disassembly only -def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1100; - let Inst{20} = 0; - let Inst{15} = 0; - let Inst{21} = 0; // sh = '0' -} - -def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh), + NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1100; let Inst{20} = 0; let Inst{15} = 0; - let Inst{21} = 1; // sh = '1' } -def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, +def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary, "ssat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; @@ -1540,27 +1545,16 @@ def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, let Inst{7-6} = 0b00; // imm2 = '00' } -def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1110; - let Inst{20} = 0; - let Inst{15} = 0; - let Inst{21} = 0; // sh = '0' -} - -def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh), + NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1110; let Inst{20} = 0; let Inst{15} = 0; - let Inst{21} = 1; // sh = '1' } -def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, +def t2USAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary, "usat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; @@ -1572,6 +1566,9 @@ def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, let Inst{7-6} = 0b00; // imm2 = '00' } +def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>; + //===----------------------------------------------------------------------===// // Shift and rotate Instructions. // @@ -1582,9 +1579,9 @@ defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; let Uses = [CPSR] in { -def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, +def t2MOVrx : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, "rrx", "\t$dst, $src", - [(set GPR:$dst, (ARMrrx GPR:$src))]> { + [(set rGPR:$dst, (ARMrrx rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1596,9 +1593,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, } let Defs = [CPSR] in { -def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, +def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, "lsrs", ".w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> { + [(set rGPR:$dst, (ARMsrl_flag rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1609,9 +1606,9 @@ def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, let Inst{14-12} = 0b000; let Inst{7-6} = 0b01; } -def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, +def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, "asrs", ".w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsra_flag GPR:$src))]> { + [(set rGPR:$dst, (ARMsra_flag rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1638,10 +1635,13 @@ defm t2EOR : T2I_bin_w_irs<0b0100, "eor", defm t2BIC : T2I_bin_w_irs<0b0001, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; +defm t2ANDS : T2I_bin_s_irs<0b0000, "and", + BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>; + let Constraints = "$src = $dst" in -def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), +def t2BFC : T2I<(outs rGPR:$dst), (ins rGPR:$src, bf_inv_mask_imm:$imm), IIC_iUNAsi, "bfc", "\t$dst, $imm", - [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]> { + [(set rGPR:$dst, (and rGPR:$src, bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10110; @@ -1649,7 +1649,7 @@ def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), let Inst{15} = 0; } -def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), +def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -1657,7 +1657,7 @@ def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), let Inst{15} = 0; } -def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), +def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -1666,10 +1666,12 @@ def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), } // A8.6.18 BFI - Bitfield insert (Encoding T1) -// Added for disassembler with the pattern field purposely left blank. -// FIXME: Utilize this instruction in codgen. -def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iALUi, "bfi", "\t$dst, $src, $lsb, $width", []> { +let Constraints = "$src = $dst" in +def t2BFI : T2I<(outs rGPR:$dst), + (ins rGPR:$src, rGPR:$val, bf_inv_mask_imm:$imm), + IIC_iALUi, "bfi", "\t$dst, $val, $imm", + [(set rGPR:$dst, (ARMbfi rGPR:$src, rGPR:$val, + bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10110; @@ -1677,19 +1679,20 @@ def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), } defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS, - (not node:$RHS))>>; + (not node:$RHS))>, 0, "">; // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version let AddedComplexity = 1 in defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>; -def : T2Pat<(and GPR:$src, t2_so_imm_not:$imm), - (t2BICri GPR:$src, t2_so_imm_not:$imm)>; +let AddedComplexity = 1 in +def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), + (t2BICri rGPR:$src, t2_so_imm_not:$imm)>; // FIXME: Disable this pattern on Darwin to workaround an assembler bug. -def : T2Pat<(or GPR:$src, t2_so_imm_not:$imm), - (t2ORNri GPR:$src, t2_so_imm_not:$imm)>, +def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm), + (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>, Requires<[IsThumb2]>; def : T2Pat<(t2_so_imm_not:$src), @@ -1699,9 +1702,9 @@ def : T2Pat<(t2_so_imm_not:$src), // Multiply Instructions. // let isCommutable = 1 in -def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, +def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, "mul", "\t$dst, $a, $b", - [(set GPR:$dst, (mul GPR:$a, GPR:$b))]> { + [(set rGPR:$dst, (mul rGPR:$a, rGPR:$b))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -1709,9 +1712,9 @@ def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0000; // Multiply } -def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2MLA: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "mla", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]> { + [(set rGPR:$dst, (add (mul rGPR:$a, rGPR:$b), rGPR:$c))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -1719,9 +1722,9 @@ def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // Multiply } -def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2MLS: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "mls", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]> { + [(set rGPR:$dst, (sub rGPR:$c, (mul rGPR:$a, rGPR:$b)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -1732,7 +1735,8 @@ def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, // Extra precision multiplies with low / high results let neverHasSideEffects = 1 in { let isCommutable = 1 in { -def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, +def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMUL64, "smull", "\t$ldst, $hdst, $a, $b", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1740,7 +1744,8 @@ def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, let Inst{7-4} = 0b0000; } -def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, +def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMUL64, "umull", "\t$ldst, $hdst, $a, $b", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1750,7 +1755,8 @@ def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, } // isCommutable // Multiply + accumulate -def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, +def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMAC64, "smlal", "\t$ldst, $hdst, $a, $b", []>{ let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1758,7 +1764,8 @@ def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, let Inst{7-4} = 0b0000; } -def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, +def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMAC64, "umlal", "\t$ldst, $hdst, $a, $b", []>{ let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1766,7 +1773,8 @@ def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, let Inst{7-4} = 0b0000; } -def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, +def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMAC64, "umaal", "\t$ldst, $hdst, $a, $b", []>{ let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1778,9 +1786,9 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, // Rounding variants of the below included for disassembly only // Most significant word multiply -def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, +def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, "smmul", "\t$dst, $a, $b", - [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]> { + [(set rGPR:$dst, (mulhs rGPR:$a, rGPR:$b))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -1788,7 +1796,7 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, +def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, "smmulr", "\t$dst, $a, $b", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -1797,9 +1805,9 @@ def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) } -def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLA : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> { + [(set rGPR:$dst, (add (mulhs rGPR:$a, rGPR:$b), rGPR:$c))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -1807,7 +1815,7 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLAR: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -1816,9 +1824,9 @@ def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) } -def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLS: T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]> { + [(set rGPR:$dst, (sub rGPR:$c, (mulhs rGPR:$a, rGPR:$b)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; @@ -1826,7 +1834,7 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -1836,10 +1844,10 @@ def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, } multiclass T2I_smul<string opc, PatFrag opnode> { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), - (sext_inreg GPR:$b, i16)))]> { + [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16), + (sext_inreg rGPR:$b, i16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1848,10 +1856,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "bt"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16))))]> { + [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16), + (sra rGPR:$b, (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1860,10 +1868,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b01; } - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "tb"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), - (sext_inreg GPR:$b, i16)))]> { + [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)), + (sext_inreg rGPR:$b, i16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1872,10 +1880,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b10; } - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "tt"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16))))]> { + [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)), + (sra rGPR:$b, (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1884,10 +1892,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b11; } - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, + def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, !strconcat(opc, "wb"), "\t$dst, $a, $b", - [(set GPR:$dst, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16)))]> { + [(set rGPR:$dst, (sra (opnode rGPR:$a, + (sext_inreg rGPR:$b, i16)), (i32 16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1896,10 +1904,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, + def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, !strconcat(opc, "wt"), "\t$dst, $a, $b", - [(set GPR:$dst, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16)))]> { + [(set rGPR:$dst, (sra (opnode rGPR:$a, + (sra rGPR:$b, (i32 16))), (i32 16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1911,11 +1919,11 @@ multiclass T2I_smul<string opc, PatFrag opnode> { multiclass T2I_smla<string opc, PatFrag opnode> { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, - (opnode (sext_inreg GPR:$a, i16), - (sext_inreg GPR:$b, i16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, + (opnode (sext_inreg rGPR:$a, i16), + (sext_inreg rGPR:$b, i16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1924,10 +1932,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16)))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (opnode (sext_inreg rGPR:$a, i16), + (sra rGPR:$b, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1936,10 +1944,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b01; } - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sext_inreg GPR:$b, i16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)), + (sext_inreg rGPR:$b, i16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1948,10 +1956,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b10; } - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16)))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)), + (sra rGPR:$b, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1960,10 +1968,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b11; } - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a, + (sext_inreg rGPR:$b, i16)), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1972,10 +1980,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a, + (sra rGPR:$b, (i32 16))), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1989,61 +1997,61 @@ defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; // Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only -def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", +def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; -def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", +def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; -def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", +def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; -def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", +def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD // These are for disassembly only. -def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> { +def t2SMUAD: T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> { +def t2SMUADX:T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> { +def t2SMUSD: T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> { +def t2SMUSDX:T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad", +def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlad", "\t$dst, $a, $b, $acc", []>; -def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx", +def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smladx", "\t$dst, $a, $b, $acc", []>; -def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd", +def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsd", "\t$dst, $a, $b, $acc", []>; -def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx", +def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsdx", "\t$dst, $a, $b, $acc", []>; -def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald", +def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlald", "\t$ldst, $hdst, $a, $b", []>; -def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx", +def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaldx", "\t$ldst, $hdst, $a, $b", []>; -def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld", +def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsld", "\t$ldst, $hdst, $a, $b", []>; -def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx", +def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsldx", "\t$ldst, $hdst, $a, $b", []>; //===----------------------------------------------------------------------===// @@ -2061,35 +2069,35 @@ class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, let Inst{5-4} = op2; } -def t2CLZ : T2I_misc<0b11, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "clz", "\t$dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>; +def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, + "clz", "\t$dst, $src", [(set rGPR:$dst, (ctlz rGPR:$src))]>; -def t2RBIT : T2I_misc<0b01, 0b10, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, +def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, "rbit", "\t$dst, $src", - [(set GPR:$dst, (ARMrbit GPR:$src))]>; + [(set rGPR:$dst, (ARMrbit rGPR:$src))]>; -def t2REV : T2I_misc<0b01, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rev", ".w\t$dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>; +def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, + "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>; -def t2REV16 : T2I_misc<0b01, 0b01, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, +def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, "rev16", ".w\t$dst, $src", - [(set GPR:$dst, - (or (and (srl GPR:$src, (i32 8)), 0xFF), - (or (and (shl GPR:$src, (i32 8)), 0xFF00), - (or (and (srl GPR:$src, (i32 8)), 0xFF0000), - (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>; + [(set rGPR:$dst, + (or (and (srl rGPR:$src, (i32 8)), 0xFF), + (or (and (shl rGPR:$src, (i32 8)), 0xFF00), + (or (and (srl rGPR:$src, (i32 8)), 0xFF0000), + (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>; -def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, +def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, "revsh", ".w\t$dst, $src", - [(set GPR:$dst, + [(set rGPR:$dst, (sext_inreg - (or (srl (and GPR:$src, 0xFF00), (i32 8)), - (shl GPR:$src, (i32 8))), i16))]>; + (or (srl (and rGPR:$src, 0xFF00), (i32 8)), + (shl rGPR:$src, (i32 8))), i16))]>; -def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", - [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), - (and (shl GPR:$src2, (i32 imm:$shamt)), +def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh", + [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF), + (and (shl rGPR:$src2, lsl_amt:$sh), 0xFFFF0000)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; @@ -2100,18 +2108,20 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), } // Alternate cases for PKHBT where identities eliminate some nodes. -def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), - (t2PKHBT GPR:$src1, GPR:$src2, 0)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)), + (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>, Requires<[HasT2ExtractPack]>; -def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), - (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)), + (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>, Requires<[HasT2ExtractPack]>; -def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", - [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), - (and (sra GPR:$src2, imm16_31:$shamt), - 0xFFFF)))]>, +// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and +// will match the pattern below. +def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh", + [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF0000), + (and (sra rGPR:$src2, asr_amt:$sh), + 0xFFFF)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -2122,18 +2132,17 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))), - (t2PKHTB GPR:$src1, GPR:$src2, 16)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)), + (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>, Requires<[HasT2ExtractPack]>; -def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), - (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), - (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), + (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)), + (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>, Requires<[HasT2ExtractPack]>; //===----------------------------------------------------------------------===// // Comparison Instructions... // - defm t2CMP : T2I_cmp_irs<0b1101, "cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; defm t2CMPz : T2I_cmp_irs<0b1101, "cmp", @@ -2157,18 +2166,13 @@ defm t2TST : T2I_cmp_irs<0b0000, "tst", defm t2TEQ : T2I_cmp_irs<0b0100, "teq", BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>; -// A8.6.27 CBNZ, CBZ - Compare and branch on (non)zero. -// Short range conditional branch. Looks awesome for loops. Need to figure -// out how to use this one. - - // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { -def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, +def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr, "mov", ".w\t$dst, $true", - [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, + [/*(set rGPR:$dst, (ARMcmov rGPR:$false, rGPR:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst"> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -2179,9 +2183,9 @@ def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, let Inst{7-4} = 0b0000; } -def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true), +def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true), IIC_iCMOVi, "mov", ".w\t$dst, $true", -[/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>, +[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst"> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -2201,20 +2205,20 @@ class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{19-16} = 0b1111; // Rn let Inst{5-4} = opcod; // Shift type. } -def t2MOVCClsl : T2I_movcc_sh<0b00, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; -def t2MOVCClsr : T2I_movcc_sh<0b01, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; -def t2MOVCCasr : T2I_movcc_sh<0b10, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; -def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; } // neverHasSideEffects @@ -2225,21 +2229,15 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def t2Int_MemBarrierV7 : AInoP<(outs), (ins), - ThumbFrm, NoItinerary, - "dmb", "", - [(ARMMemBarrierV7)]>, - Requires<[IsThumb2]> { +def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "", + [(ARMMemBarrier)]>, Requires<[IsThumb, HasDB]> { let Inst{31-4} = 0xF3BF8F5; // FIXME: add support for options other than a full system DMB let Inst{3-0} = 0b1111; } -def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), - ThumbFrm, NoItinerary, - "dsb", "", - [(ARMSyncBarrierV7)]>, - Requires<[IsThumb2]> { +def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "", + [(ARMSyncBarrier)]>, Requires<[IsThumb, HasDB]> { let Inst{31-4} = 0xF3BF8F4; // FIXME: add support for options other than a full system DSB let Inst{3-0} = 0b1111; @@ -2329,13 +2327,13 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, +def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrexb", "\t$dest, [$ptr]", "", []>; -def t2LDREXH : T2I_ldrex<0b01, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, +def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrexh", "\t$dest, [$ptr]", "", []>; -def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, +def t2LDREX : Thumb2I<(outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrex", "\t$dest, [$ptr]", "", []> { @@ -2344,20 +2342,20 @@ def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, let Inst{11-8} = 0b1111; let Inst{7-0} = 0b00000000; // imm8 = 0 } -def t2LDREXD : T2I_ldrex<0b11, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), +def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$dest, rGPR:$dest2), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrexd", "\t$dest, $dest2, [$ptr]", "", [], {?, ?, ?, ?}>; } let mayStore = 1, Constraints = "@earlyclobber $success" in { -def t2STREXB : T2I_strex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), +def t2STREXB : T2I_strex<0b00, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strexb", "\t$success, $src, [$ptr]", "", []>; -def t2STREXH : T2I_strex<0b01, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), +def t2STREXH : T2I_strex<0b01, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strexh", "\t$success, $src, [$ptr]", "", []>; -def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), +def t2STREX : Thumb2I<(outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strex", "\t$success, $src, [$ptr]", "", []> { @@ -2365,8 +2363,8 @@ def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), let Inst{26-20} = 0b0000100; let Inst{7-0} = 0b00000000; // imm8 = 0 } -def t2STREXD : T2I_strex<0b11, (outs GPR:$success), - (ins GPR:$src, GPR:$src2, GPR:$ptr), +def t2STREXD : T2I_strex<0b11, (outs rGPR:$success), + (ins rGPR:$src, rGPR:$src2, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strexd", "\t$success, $src, $src2, [$ptr]", "", [], {?, ?, ?, ?}>; @@ -2416,7 +2414,7 @@ let Defs = D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31 ], hasSideEffects = 1, isBarrier = 1 in { - def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), + def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" "adds\t$val, #7\n\t" @@ -2425,14 +2423,14 @@ let Defs = "b\t1f\n\t" "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; } let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], hasSideEffects = 1, isBarrier = 1 in { - def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), + def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" "adds\t$val, #7\n\t" @@ -2441,7 +2439,7 @@ let Defs = "b\t1f\n\t" "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; } @@ -2482,7 +2480,7 @@ let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : T2JTI<(outs), (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target\n$jt", + IIC_Br, "mov\tpc, $target$jt", [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0100100; @@ -2496,7 +2494,7 @@ def t2BR_JT : def t2TBB : T2JTI<(outs), (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbb\t$index\n$jt", []> { + IIC_Br, "tbb\t$index$jt", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0001101; let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction) @@ -2507,7 +2505,7 @@ def t2TBB : def t2TBH : T2JTI<(outs), (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbh\t$index\n$jt", []> { + IIC_Br, "tbh\t$index$jt", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0001101; let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction) @@ -2560,7 +2558,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} -def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", +def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -2647,25 +2645,25 @@ def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode", } // Return From Exception is a system instruction -- for disassembly only -def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!", +def t2RFEDBW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfedb", "\t$base!", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000011; // W = 1 } -def t2RFEDB : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base", +def t2RFEDB : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeab", "\t$base", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000001; // W = 0 } -def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!", +def t2RFEIAW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base!", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0011011; // W = 1 } -def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base", +def t2RFEIA : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0011001; // W = 0 @@ -2676,26 +2674,26 @@ def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base", // // Two piece so_imms. -def : T2Pat<(or GPR:$LHS, t2_so_imm2part:$RHS), - (t2ORRri (t2ORRri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), +def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS), + (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS), - (t2EORri (t2EORri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), +def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS), + (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS), - (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), +def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS), + (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS), - (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)), +def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS), + (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)), (t2_so_neg_imm2part_2 imm:$RHS))>; // 32-bit immediate using movw + movt. // This is a single pseudo instruction to make it re-materializable. Remove // when we can do generalized remat. let isReMaterializable = 1 in -def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, +def t2MOVi32imm : T2Ix2<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", - [(set GPR:$dst, (i32 imm:$src))]>; + [(set rGPR:$dst, (i32 imm:$src))]>; // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, @@ -2723,7 +2721,7 @@ def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), // // Rd = Instr{11-8} -def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", +def t2MRS : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -2734,7 +2732,7 @@ def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", } // Rd = Instr{11-8} -def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", +def t2MRSsys : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -2745,7 +2743,7 @@ def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", } // Rn = Inst{19-16} -def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr", +def t2MSR : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr", "\tcpsr$mask, $src", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; @@ -2757,7 +2755,7 @@ def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr", } // Rn = Inst{19-16} -def t2MSRsys : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr", +def t2MSRsys : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr", "\tspsr$mask, $src", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 84c23e1a784c..c29e09606bd4 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -77,61 +77,61 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), // let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { -def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, +def VLDMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> { + "vldm${addr:submode}${p}\t$addr, $dsts", "", []> { let Inst{20} = 1; } -def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, +def VLDMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> { + "vldm${addr:submode}${p}\t$addr, $dsts", "", []> { let Inst{20} = 1; } -def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VLDMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}!, $dsts", - "$addr.base = $wb", []> { + "vldm${addr:submode}${p}\t$addr!, $dsts", + "$addr.addr = $wb", []> { let Inst{20} = 1; } -def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VLDMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}!, $dsts", - "$addr.base = $wb", []> { + "vldm${addr:submode}${p}\t$addr!, $dsts", + "$addr.addr = $wb", []> { let Inst{20} = 1; } } // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { -def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, +def VSTMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> { + "vstm${addr:submode}${p}\t$addr, $srcs", "", []> { let Inst{20} = 0; } -def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, +def VSTMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> { + "vstm${addr:submode}${p}\t$addr, $srcs", "", []> { let Inst{20} = 0; } -def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VSTMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}!, $srcs", - "$addr.base = $wb", []> { + "vstm${addr:submode}${p}\t$addr!, $srcs", + "$addr.addr = $wb", []> { let Inst{20} = 0; } -def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}!, $srcs", - "$addr.base = $wb", []> { + "vstm${addr:submode}${p}\t$addr!, $srcs", + "$addr.addr = $wb", []> { let Inst{20} = 0; } } // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq @@ -420,34 +420,35 @@ def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. // For disassembly only. - +let Uses = [FPSCR] in { def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> { let Inst{7} = 0; // Z bit } def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> { let Inst{7} = 0; // Z bit } def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> { let Inst{7} = 0; // Z bit } def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> { let Inst{7} = 0; // Z bit } +} // Convert between floating-point and fixed-point // Data type for fixed-point naming convention: @@ -460,6 +461,7 @@ let Constraints = "$a = $dst" in { // FP to Fixed-Point: +let isCodeGenOnly = 1 in { def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", @@ -499,9 +501,11 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]>; +} // Fixed-Point to FP: +let isCodeGenOnly = 1 in { def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", @@ -541,6 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]>; +} } // End of 'let Constraints = "$src = $dst" in' @@ -654,32 +659,27 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", } // FPSCR <-> GPR (for disassembly only) - -let neverHasSideEffects = 1 in { -let Uses = [FPSCR] in { -def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", - "\t$dst, fpscr", - [/* For disassembly only; pattern left blank */]> { +let hasSideEffects = 1, Uses = [FPSCR] in +def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, + "vmrs", "\t$dst, fpscr", + [(set GPR:$dst, (int_arm_get_fpscr))]> { let Inst{27-20} = 0b11101111; let Inst{19-16} = 0b0001; let Inst{11-8} = 0b1010; let Inst{7} = 0; let Inst{4} = 1; } -} -let Defs = [FPSCR] in { -def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", - "\tfpscr, $src", - [/* For disassembly only; pattern left blank */]> { +let Defs = [FPSCR] in +def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, + "vmsr", "\tfpscr, $src", + [(int_arm_set_fpscr GPR:$src)]> { let Inst{27-20} = 0b11101110; let Inst{19-16} = 0b0001; let Inst{11-8} = 0b1010; let Inst{7} = 0; let Inst{4} = 1; } -} -} // neverHasSideEffects // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index f80e316d23e8..2b7645a42119 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -57,7 +57,7 @@ STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); namespace { struct ARMLoadStoreOpt : public MachineFunctionPass { static char ID; - ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} + ARMLoadStoreOpt() : MachineFunctionPass(ID) {} const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -193,20 +193,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); - if (isAM4 && Offset == 4) { - if (isThumb2) - // Thumb2 does not support ldmib / stmib. - return false; + // VFP and Thumb2 do not support IB or DA modes. + bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); + bool haveIBAndDA = isNotVFP && !isThumb2; + if (Offset == 4 && haveIBAndDA) Mode = ARM_AM::ib; - } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { - if (isThumb2) - // Thumb2 does not support ldmda / stmda. - return false; + else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) Mode = ARM_AM::da; - } else if (isAM4 && Offset == -4 * (int)NumRegs) { + else if (Offset == -4 * (int)NumRegs && isNotVFP) + // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; - } else if (Offset != 0) { + else if (Offset != 0) { // If starting offset isn't zero, insert a MI to materialize a new base. // But only do so if it is cost effective, i.e. merging more than two // loads / stores. @@ -246,18 +243,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, BaseKill = true; // New base is always killed right its use. } - bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD); bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); Opcode = getLoadStoreMultipleOpcode(Opcode); - MachineInstrBuilder MIB = (isAM4) - ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) - : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs)) - .addImm(Pred).addReg(PredReg); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg); for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -333,6 +324,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, if (KilledRegs.count(Reg)) { unsigned j = Killer[Reg]; memOps[j].MBBI->getOperand(0).setIsKill(false); + memOps[j].isKill = false; } } MBB.erase(memOps[i].MBBI); @@ -348,7 +340,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, SmallVector<MachineBasicBlock::iterator, 4> &Merges) { - bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); + bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); int Offset = MemOps[SIndex].Offset; int SOffset = Offset; unsigned insertAfter = SIndex; @@ -366,12 +358,12 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Reg = MO.getReg(); unsigned RegNum = MO.isUndef() ? UINT_MAX : ARMRegisterInfo::getRegisterNumbering(Reg); - // AM4 - register numbers in ascending order. - // AM5 - consecutive register numbers in ascending order. - // Can only do up to 16 double-word registers per insn. + // Register numbers must be in ascending order. For VFP, the registers + // must also be consecutive and there is a limit of 16 double-word + // registers per instruction. if (Reg != ARM::SP && NewOffset == Offset + (int)Size && - ((isAM4 && RegNum > PRegNum) + ((isNotVFP && RegNum > PRegNum) || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { Offset += Size; PRegNum = RegNum; @@ -409,7 +401,7 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, return false; // Make sure the offset fits in 8 bits. - if (Bytes <= 0 || (Limit && Bytes >= Limit)) + if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME @@ -433,7 +425,7 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, MI->getOpcode() != ARM::ADDri) return false; - if (Bytes <= 0 || (Limit && Bytes >= Limit)) + if (Bytes == 0 || (Limit && Bytes >= Limit)) // Make sure the offset fits in 8 bits. return false; @@ -464,12 +456,12 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STM: case ARM::t2LDM: case ARM::t2STM: - return (MI->getNumOperands() - 4) * 4; case ARM::VLDMS: case ARM::VSTMS: + return (MI->getNumOperands() - 4) * 4; case ARM::VLDMD: case ARM::VSTMD: - return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; + return (MI->getNumOperands() - 4) * 8; } } @@ -512,26 +504,17 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); - bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM || - Opcode == ARM::STM || Opcode == ARM::t2STM); bool DoMerge = false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - unsigned Offset = 0; - if (isAM4) { - // Can't use an updating ld/st if the base register is also a dest - // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. - for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { - if (MI->getOperand(i).getReg() == Base) - return false; - } - Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); - } else { - // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. - Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); - Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); + // Can't use an updating ld/st if the base register is also a dest + // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. + for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).getReg() == Base) + return false; } + Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); // Try merging with the previous instruction. MachineBasicBlock::iterator BeginMBBI = MBB.begin(); @@ -539,22 +522,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator PrevMBBI = prior(MBBI); while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) --PrevMBBI; - if (isAM4) { - if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - Mode = ARM_AM::db; - } else if (isAM4 && Mode == ARM_AM::ib && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - Mode = ARM_AM::da; - } - } else { - if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - Mode = ARM_AM::db; - DoMerge = true; - } + if (Mode == ARM_AM::ia && + isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { + Mode = ARM_AM::db; + DoMerge = true; + } else if (Mode == ARM_AM::ib && + isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { + Mode = ARM_AM::da; + DoMerge = true; } if (DoMerge) MBB.erase(PrevMBBI); @@ -566,19 +541,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) ++NextMBBI; - if (isAM4) { - if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && - isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && - isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } - } else { - if (Mode == ARM_AM::ia && - isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } + if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && + isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { + DoMerge = true; + } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && + isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { + DoMerge = true; } if (DoMerge) { if (NextMBBI == I) { @@ -595,16 +563,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register - .addReg(Base, getKillRegState(BaseKill)); - if (isAM4) { - // [t2]LDM_UPD, [t2]STM_UPD - MIB.addImm(ARM_AM::getAM4ModeImm(Mode)) - .addImm(Pred).addReg(PredReg); - } else { - // VLDM[SD}_UPD, VSTM[SD]_UPD - MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset)) - .addImm(Pred).addReg(PredReg); - } + .addReg(Base, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(Mode)) + .addImm(Pred).addReg(PredReg); // Transfer the rest of operands. for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.addOperand(MI->getOperand(OpNum)); @@ -736,11 +697,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (!DoMerge) return false; - bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; unsigned Offset = 0; if (isAM5) - Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia, - (isDPR ? 2 : 1)); + Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ? + ARM_AM::db : ARM_AM::ia); else if (isAM2) Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); else @@ -748,6 +708,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (isAM5) { // VLDM[SD}_UPD, VSTM[SD]_UPD + // (There are no base-updating versions of VLDR/VSTR instructions, but the + // updating load/store-multiple instructions can be used with only one + // register.) MachineOperand &MO = MI->getOperand(0); BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register @@ -1268,7 +1231,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { namespace { struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ static char ID; - ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} + ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} const TargetData *TD; const TargetInstrInfo *TII; diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index ab2b06b60783..ab2b06b60783 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/ARMMCInstLower.h index b81a30690ce2..b81a30690ce2 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h +++ b/lib/Target/ARM/ARMMCInstLower.h diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 7e57a1ca5576..514c26b4daf0 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -43,6 +43,10 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// processFunctionBeforeCalleeSavedScan(). bool HasStackFrame; + /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by + /// emitPrologue. + bool RestoreSPFromFP; + /// LRSpilledForFarJump - True if the LR register has been for spilled to /// enable far jump. bool LRSpilledForFarJump; @@ -95,7 +99,7 @@ public: ARMFunctionInfo() : isThumb(false), hasThumb2(false), - VarArgsRegSaveSize(0), HasStackFrame(false), + VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), @@ -106,7 +110,7 @@ public: explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), - VarArgsRegSaveSize(0), HasStackFrame(false), + VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), @@ -125,6 +129,9 @@ public: bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } + bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; } + void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; } + bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; } void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; } diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index d020f3c74bde..305b232e6a99 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===// +//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -220,41 +220,11 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // FP is R11, R9 is available. - static const unsigned ARM_GPR_AO_1[] = { + static const unsigned ARM_GPR_AO[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R9, ARM::R10, - ARM::R11 }; - // FP is R11, R9 is not available. - static const unsigned ARM_GPR_AO_2[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R10, - ARM::R11 }; - // FP is R7, R9 is available as non-callee-saved register. - // This is used by Darwin. - static const unsigned ARM_GPR_AO_3[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R9, ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, - ARM::R8, ARM::R10,ARM::R11,ARM::R7 }; - // FP is R7, R9 is not available. - static const unsigned ARM_GPR_AO_4[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, - ARM::R8, ARM::R10,ARM::R11, - ARM::R7 }; - // FP is R7, R9 is available as callee-saved register. - // This is used by non-Darwin platform in Thumb mode. - static const unsigned ARM_GPR_AO_5[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, - ARM::R8, ARM::R9, ARM::R10,ARM::R11,ARM::R7 }; + ARM::R8, ARM::R9, ARM::R10, ARM::R11 }; // For Thumb1 mode, we don't want to allocate hi regs at all, as we // don't know how to spill them. If we make our prologue/epilogue code @@ -270,85 +240,71 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); if (Subtarget.isThumb1Only()) return THUMB_GPR_AO; - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - return ARM_GPR_AO_4; - else - return ARM_GPR_AO_3; - } else { - if (Subtarget.isR9Reserved()) - return ARM_GPR_AO_2; - else if (Subtarget.isThumb()) - return ARM_GPR_AO_5; - else - return ARM_GPR_AO_1; - } + return ARM_GPR_AO; } GPRClass::iterator GPRClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - GPRClass::iterator I; - - if (Subtarget.isThumb1Only()) { - I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned)); - // Mac OS X requires FP not to be clobbered for backtracing purpose. - return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; - } - - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned)); - else - I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned)); - } else { - if (Subtarget.isR9Reserved()) - I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned)); - else if (Subtarget.isThumb()) - I = ARM_GPR_AO_5 + (sizeof(ARM_GPR_AO_5)/sizeof(unsigned)); - else - I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned)); - } - - // Mac OS X requires FP not to be clobbered for backtracing purpose. - return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; + if (Subtarget.isThumb1Only()) + return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned)); + return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned)); } }]; } -// Thumb registers are R0-R7 normally. Some instructions can still use -// the general GPR register class above (MOV, e.g.) -def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { +// restricted GPR register class. Many Thumb2 instructions allow the full +// register range for operands, but have undefined behaviours when PC +// or SP (R13 or R15) are used. The ARM ARM refers to these operands +// via the BadReg() pseudo-code description. +def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, + R7, R8, R9, R10, R11, R12, LR]> { let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - static const unsigned THUMB_tGPR_AO[] = { + static const unsigned ARM_rGPR_AO[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R9, ARM::R10, + ARM::R11 }; + + // For Thumb1 mode, we don't want to allocate hi regs at all, as we + // don't know how to spill them. If we make our prologue/epilogue code + // smarter at some point, we can go back to using the above allocation + // orders for the Thumb1 instructions that know how to use hi regs. + static const unsigned THUMB_rGPR_AO[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; - // FP is R7, only low registers available. - tGPRClass::iterator - tGPRClass::allocation_order_begin(const MachineFunction &MF) const { - return THUMB_tGPR_AO; + rGPRClass::iterator + rGPRClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + if (Subtarget.isThumb1Only()) + return THUMB_rGPR_AO; + return ARM_rGPR_AO; } - tGPRClass::iterator - tGPRClass::allocation_order_end(const MachineFunction &MF) const { + rGPRClass::iterator + rGPRClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - tGPRClass::iterator I = - THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned)); - // Mac OS X requires FP not to be clobbered for backtracing purpose. - return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; + + if (Subtarget.isThumb1Only()) + return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned)); + return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned)); } }]; } +// Thumb registers are R0-R7 normally. Some instructions can still use +// the general GPR register class above (MOV, e.g.) +def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {} + // For tail calls, we can't use callee-saved registers, as they are restored // to the saved value before the tail call, which would clobber a call address. // Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of @@ -381,36 +337,20 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> { const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); if (Subtarget.isThumb1Only()) return THUMB_GPR_AO_TC; - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - return ARM_GPR_NOR9_TC; - else - return ARM_GPR_R9_TC; - } else - // R9 is either callee-saved or reserved; can't use it. - return ARM_GPR_NOR9_TC; + return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC; } tcGPRClass::iterator tcGPRClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - GPRClass::iterator I; - - if (Subtarget.isThumb1Only()) { - I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned)); - return I; - } - - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); - else - I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)); - } else - // R9 is either callee-saved or reserved; can't use it. - I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); - return I; + + if (Subtarget.isThumb1Only()) + return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned)); + + return Subtarget.isTargetDarwin() ? + ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) : + ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); } }]; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 10fd257055fb..cb539f4c01ec 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -33,14 +33,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , ARMFPUType(None) , UseNEONForSinglePrecisionFP(false) , SlowVMLx(false) + , SlowFPBrcc(false) , IsThumb(isT) , ThumbMode(Thumb1) + , NoARM(false) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) , UseMovt(UseMOVT) , HasFP16(false) , HasHardwareDivide(false) , HasT2ExtractPack(false) + , HasDataBarrier(false) + , Pref32BitThumb(false) + , FPOnlySP(false) , stackAlignment(4) , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index e7d92ede9b98..67e58038ee77 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,7 +26,7 @@ class GlobalValue; class ARMSubtarget : public TargetSubtarget { protected: enum ARMArchEnum { - V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M + V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M }; enum ARMFPEnum { @@ -63,6 +63,9 @@ protected: /// ThumbMode - Indicates supported Thumb version. ThumbTypeEnum ThumbMode; + /// NoARM - True if subtarget does not support ARM mode execution. + bool NoARM; + /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; @@ -84,6 +87,18 @@ protected: /// instructions. bool HasT2ExtractPack; + /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier + /// instructions. + bool HasDataBarrier; + + /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions + /// over 16-bit ones. + bool Pref32BitThumb; + + /// FPOnlySP - If true, the floating point unit only supports single + /// precision. + bool FPOnlySP; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -128,6 +143,8 @@ protected: bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; } bool hasV7Ops() const { return ARMArchVersion >= V7A; } + bool hasARMOps() const { return !NoARM; } + bool hasVFP2() const { return ARMFPUType >= VFPv2; } bool hasVFP3() const { return ARMFPUType >= VFPv3; } bool hasNEON() const { return ARMFPUType >= NEON; } @@ -135,8 +152,11 @@ protected: return hasNEON() && UseNEONForSinglePrecisionFP; } bool hasDivide() const { return HasHardwareDivide; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } + bool hasDataBarrier() const { return HasDataBarrier; } bool useVMLx() const {return hasVFP2() && !SlowVMLx; } bool isFPBrccSlow() const { return SlowFPBrcc; } + bool isFPOnlySP() const { return FPOnlySP; } + bool prefers32BitThumb() const { return Pref32BitThumb; } bool hasFP16() const { return HasFP16; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 09203f9304df..30ff8276cdaa 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -31,7 +31,6 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } } - extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); @@ -66,6 +65,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, "v128:64:128-v64:64:64-n32")), TLInfo(*this), TSInfo(*this) { + if (!Subtarget.hasARMOps()) + report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " + "support ARM mode execution!"); } ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, @@ -85,9 +87,15 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, TSInfo(*this) { } +// Pass Pipeline Configuration +bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (OptLevel != CodeGenOpt::None) + PM.add(createARMGlobalMergePass(getTargetLowering())); + return false; +} -// Pass Pipeline Configuration bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { PM.add(createARMISelDag(*this, OptLevel)); @@ -132,7 +140,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (Subtarget.isThumb2()) + if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb()) PM.add(createThumb2SizeReductionPass()); PM.add(createARMConstantIslandPass()); diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index a222e57b13ff..17e5425a9d37 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -50,6 +50,7 @@ public: } // Pass Pipeline Configuration + virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 4b083244b241..75e2a739bf1f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMSubtarget.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -18,8 +19,10 @@ #include "llvm/Target/TargetAsmParser.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" using namespace llvm; @@ -37,6 +40,7 @@ enum ShiftType { class ARMAsmParser : public TargetAsmParser { MCAsmParser &Parser; + TargetMachine &TM; private: MCAsmParser &getParser() const { return Parser; } @@ -76,26 +80,33 @@ private: bool ParseDirectiveSyntax(SMLoc L); - // TODO - For now hacked versions of the next two are in here in this file to - // allow some parser testing until the table gen versions are implemented. + bool MatchInstruction(SMLoc IDLoc, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCInst &Inst) { + if (!MatchInstructionImpl(Operands, Inst)) + return false; + + // FIXME: We should give nicer diagnostics about the exact failure. + Error(IDLoc, "unrecognized instruction"); + + return true; + } /// @name Auto-generated Match Functions /// { - bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCInst &Inst); - /// MatchRegisterName - Match the given string to a register name and return - /// its register number, or -1 if there is no match. To allow return values - /// to be used directly in register lists, arm registers have values between - /// 0 and 15. - int MatchRegisterName(StringRef Name); + unsigned ComputeAvailableFeatures(const ARMSubtarget *Subtarget) const; + + bool MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*> + &Operands, + MCInst &Inst); /// } public: - ARMAsmParser(const Target &T, MCAsmParser &_Parser) - : TargetAsmParser(T), Parser(_Parser) {} + ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) + : TargetAsmParser(T), Parser(_Parser), TM(_TM) {} virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -110,16 +121,21 @@ private: ARMOperand() {} public: enum KindTy { - Token, - Register, + CondCode, Immediate, - Memory + Memory, + Register, + Token } Kind; SMLoc StartLoc, EndLoc; union { struct { + ARMCC::CondCodes Val; + } CC; + + struct { const char *Data; unsigned Length; } Tok; @@ -151,16 +167,19 @@ public: }; - ARMOperand(KindTy K, SMLoc S, SMLoc E) - : Kind(K), StartLoc(S), EndLoc(E) {} + //ARMOperand(KindTy K, SMLoc S, SMLoc E) + // : Kind(K), StartLoc(S), EndLoc(E) {} ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; switch (Kind) { + case CondCode: + CC = o.CC; + break; case Token: - Tok = o.Tok; + Tok = o.Tok; break; case Register: Reg = o.Reg; @@ -179,6 +198,11 @@ public: /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } + ARMCC::CondCodes getCondCode() const { + assert(Kind == CondCode && "Invalid access!"); + return CC.Val; + } + StringRef getToken() const { assert(Kind == Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); @@ -194,15 +218,50 @@ public: return Imm.Val; } - bool isToken() const {return Kind == Token; } + bool isCondCode() const { return Kind == CondCode; } + + bool isImm() const { return Kind == Immediate; } bool isReg() const { return Kind == Register; } + bool isToken() const {return Kind == Token; } + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + + void addCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); + // FIXME: What belongs here? + Inst.addOperand(MCOperand::CreateReg(0)); + } + void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); } + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + virtual void dump(raw_ostream &OS) const; + + static void CreateCondCode(OwningPtr<ARMOperand> &Op, ARMCC::CondCodes CC, + SMLoc S) { + Op.reset(new ARMOperand); + Op->Kind = CondCode; + Op->CC.Val = CC; + Op->StartLoc = S; + Op->EndLoc = S; + } + static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str, SMLoc S) { Op.reset(new ARMOperand); @@ -262,6 +321,33 @@ public: } // end anonymous namespace. +void ARMOperand::dump(raw_ostream &OS) const { + switch (Kind) { + case CondCode: + OS << ARMCondCodeToString(getCondCode()); + break; + case Immediate: + getImm()->print(OS); + break; + case Memory: + OS << "<memory>"; + break; + case Register: + OS << "<register " << getReg() << ">"; + break; + case Token: + OS << "'" << getToken() << "'"; + break; + } +} + +/// @name Auto-generated Match Functions +/// { + +static unsigned MatchRegisterName(StringRef Name); + +/// } + /// Try to parse a register name. The token must be an Identifier when called, /// and if it is a register name a Reg operand is created, the token is eaten /// and false is returned. Else true is returned and no token is eaten. @@ -548,77 +634,6 @@ bool ARMAsmParser::ParseShift(ShiftType &St, return false; } -/// A hack to allow some testing, to be replaced by a real table gen version. -int ARMAsmParser::MatchRegisterName(StringRef Name) { - if (Name == "r0" || Name == "R0") - return 0; - else if (Name == "r1" || Name == "R1") - return 1; - else if (Name == "r2" || Name == "R2") - return 2; - else if (Name == "r3" || Name == "R3") - return 3; - else if (Name == "r3" || Name == "R3") - return 3; - else if (Name == "r4" || Name == "R4") - return 4; - else if (Name == "r5" || Name == "R5") - return 5; - else if (Name == "r6" || Name == "R6") - return 6; - else if (Name == "r7" || Name == "R7") - return 7; - else if (Name == "r8" || Name == "R8") - return 8; - else if (Name == "r9" || Name == "R9") - return 9; - else if (Name == "r10" || Name == "R10") - return 10; - else if (Name == "r11" || Name == "R11" || Name == "fp") - return 11; - else if (Name == "r12" || Name == "R12" || Name == "ip") - return 12; - else if (Name == "r13" || Name == "R13" || Name == "sp") - return 13; - else if (Name == "r14" || Name == "R14" || Name == "lr") - return 14; - else if (Name == "r15" || Name == "R15" || Name == "pc") - return 15; - return -1; -} - -/// A hack to allow some testing, to be replaced by a real table gen version. -bool ARMAsmParser:: -MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCInst &Inst) { - ARMOperand &Op0 = *(ARMOperand*)Operands[0]; - assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); - StringRef Mnemonic = Op0.getToken(); - if (Mnemonic == "add" || - Mnemonic == "stmfd" || - Mnemonic == "str" || - Mnemonic == "ldmfd" || - Mnemonic == "ldr" || - Mnemonic == "mov" || - Mnemonic == "sub" || - Mnemonic == "bl" || - Mnemonic == "push" || - Mnemonic == "blx" || - Mnemonic == "pop") { - // Hard-coded to a valid instruction, till we have a real matcher. - Inst = MCInst(); - Inst.setOpcode(ARM::MOVr); - Inst.addOperand(MCOperand::CreateReg(2)); - Inst.addOperand(MCOperand::CreateReg(2)); - Inst.addOperand(MCOperand::CreateImm(0)); - Inst.addOperand(MCOperand::CreateImm(0)); - Inst.addOperand(MCOperand::CreateReg(0)); - return false; - } - - return true; -} - /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) { @@ -661,12 +676,56 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) { bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { OwningPtr<ARMOperand> Op; - ARMOperand::CreateToken(Op, Name, NameLoc); - + + // Create the leading tokens for the mnemonic, split by '.' characters. + size_t Start = 0, Next = Name.find('.'); + StringRef Head = Name.slice(Start, Next); + + // Determine the predicate, if any. + // + // FIXME: We need a way to check whether a prefix supports predication, + // otherwise we will end up with an ambiguity for instructions that happen to + // end with a predicate name. + unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2)) + .Case("eq", ARMCC::EQ) + .Case("ne", ARMCC::NE) + .Case("hs", ARMCC::HS) + .Case("lo", ARMCC::LO) + .Case("mi", ARMCC::MI) + .Case("pl", ARMCC::PL) + .Case("vs", ARMCC::VS) + .Case("vc", ARMCC::VC) + .Case("hi", ARMCC::HI) + .Case("ls", ARMCC::LS) + .Case("ge", ARMCC::GE) + .Case("lt", ARMCC::LT) + .Case("gt", ARMCC::GT) + .Case("le", ARMCC::LE) + .Case("al", ARMCC::AL) + .Default(~0U); + if (CC != ~0U) { + Head = Head.slice(0, Head.size() - 2); + } else + CC = ARMCC::AL; + + ARMOperand::CreateToken(Op, Head, NameLoc); Operands.push_back(Op.take()); - if (getLexer().isNot(AsmToken::EndOfStatement)) { + ARMOperand::CreateCondCode(Op, ARMCC::CondCodes(CC), NameLoc); + Operands.push_back(Op.take()); + + // Add the remaining tokens in the mnemonic. + while (Next != StringRef::npos) { + Start = Next; + Next = Name.find('.', Start + 1); + Head = Name.slice(Start, Next); + ARMOperand::CreateToken(Op, Head, NameLoc); + Operands.push_back(Op.take()); + } + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. OwningPtr<ARMOperand> Op; if (ParseOperand(Op)) return true; @@ -809,3 +868,5 @@ extern "C" void LLVMInitializeARMAsmParser() { RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); LLVMInitializeARMAsmLexer(); } + +#include "ARMGenAsmMatcher.inc" diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index edc934549b28..8026e7718ca9 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -158,7 +158,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) && MI->getOperand(0).getReg() == ARM::SP) { const MCOperand &MO1 = MI->getOperand(2); - if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) { + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) { O << '\t' << "vpush"; printPredicateOperand(MI, 3, O); O << '\t'; @@ -171,7 +171,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) && MI->getOperand(0).getReg() == ARM::SP) { const MCOperand &MO1 = MI->getOperand(2); - if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) { + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) { O << '\t' << "vpop"; printPredicateOperand(MI, 3, O); O << '\t'; @@ -278,15 +278,13 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, O << getRegisterName(MO1.getReg()); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())) - << ' '; - + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (MO2.getReg()) { - O << getRegisterName(MO2.getReg()); + O << ' ' << getRegisterName(MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else { - O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } else if (ShOpc != ARM_AM::rrx) { + O << " #" << ARM_AM::getSORegOffset(MO3.getImm()); } } @@ -414,16 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, return; } - if (Modifier && strcmp(Modifier, "submode") == 0) { - ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - O << ARM_AM::getAMSubModeStr(Mode); - return; - } else if (Modifier && strcmp(Modifier, "base") == 0) { - // Used for FSTM{D|S} and LSTM{D|S} operations. - O << getRegisterName(MO1.getReg()); - return; - } - O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { @@ -463,9 +451,9 @@ void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum, assert(0 && "FIXME: Implement printAddrModePCOperand"); } -void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); uint32_t v = ~MO.getImm(); int32_t lsb = CountTrailingZeros_32(v); @@ -474,6 +462,31 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI, O << '#' << lsb << ", #" << width; } +void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned val = MI->getOperand(OpNum).getImm(); + O << ARM_MB::MemBOptToString(val); +} + +void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned ShiftOp = MI->getOperand(OpNum).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + switch (Opc) { + case ARM_AM::no_shift: + return; + case ARM_AM::lsl: + O << ", lsl #"; + break; + case ARM_AM::asr: + O << ", asr #"; + break; + default: + assert(0 && "unexpected shift opcode for shift immediate operand"); + } + O << ARM_AM::getSORegOffset(ShiftOp); +} + void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << "{"; @@ -669,12 +682,11 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, O << getRegisterName(Reg); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())) - << " "; - assert(MO2.isImm() && "Not a valid t2_so_reg value!"); - O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); + if (ShOpc != ARM_AM::rrx) + O << " #" << ARM_AM::getSORegOffset(MO2.getImm()); } void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI, diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index ddf5047793d2..e5ad0d07e9ba 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -57,6 +57,8 @@ public: void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt index 4e299f86ecb6..18645c0864a3 100644 --- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt +++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt @@ -1,8 +1,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMARMAsmPrinter - ARMAsmPrinter.cpp ARMInstPrinter.cpp - ARMMCInstLower.cpp ) add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen) diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 0df34666b959..6b4dee5965d2 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -7,25 +7,32 @@ tablegen(ARMGenInstrNames.inc -gen-instr-enums) tablegen(ARMGenInstrInfo.inc -gen-instr-desc) tablegen(ARMGenCodeEmitter.inc -gen-emitter) tablegen(ARMGenAsmWriter.inc -gen-asm-writer) +tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher) tablegen(ARMGenDAGISel.inc -gen-dag-isel) +tablegen(ARMGenFastISel.inc -gen-fast-isel) tablegen(ARMGenCallingConv.inc -gen-callingconv) tablegen(ARMGenSubtarget.inc -gen-subtarget) tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info) add_llvm_target(ARMCodeGen + ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp ARMExpandPseudoInsts.cpp + ARMFastISel.cpp + ARMGlobalMerge.cpp ARMISelDAGToDAG.cpp ARMISelLowering.cpp ARMInstrInfo.cpp ARMJITInfo.cpp ARMLoadStoreOptimizer.cpp ARMMCAsmInfo.cpp + ARMMCInstLower.cpp ARMRegisterInfo.cpp + ARMSelectionDAGInfo.cpp ARMSubtarget.cpp ARMTargetMachine.cpp ARMTargetObjectFile.cpp @@ -38,7 +45,6 @@ add_llvm_target(ARMCodeGen Thumb2InstrInfo.cpp Thumb2RegisterInfo.cpp Thumb2SizeReduction.cpp - ARMSelectionDAGInfo.cpp ) -target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG) +target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 4de697e8bf67..e22028985b46 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -26,6 +26,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +//#define DEBUG(X) do { X; } while (0) + /// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from /// ARMDecoderEmitter.cpp TableGen backend. It contains: /// @@ -87,6 +89,11 @@ static unsigned decodeARMInstruction(uint32_t &insn) { return ARM::BFI; } + // Ditto for STRBT, which is a super-instruction for A8.6.199 Encoding A1 & A2. + // As a result, the decoder fails to deocode USAT properly. + if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1) + return ARM::USAT; + // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8. // As a result, the decoder fails to decode UMULL properly. if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) { @@ -106,7 +113,7 @@ static unsigned decodeARMInstruction(uint32_t &insn) { // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2. // As a result, the decoder fails to deocode SSAT properly. if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1) - return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr; + return ARM::SSAT; // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147. // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT. @@ -291,7 +298,7 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) { /// decodeInstruction(insn) is invoked on the original insn. /// /// Otherwise, decodeThumbInstruction is called with the original insn. -static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) { +static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) { if (IsThumb2) { uint16_t op1 = slice(insn, 28, 27); uint16_t op2 = slice(insn, 26, 20); @@ -429,7 +436,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to // the top half followed by the second halfword. - uint32_t insn = 0; + unsigned insn = 0; // Possible second halfword. uint16_t insn1 = 0; diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index a07ff2832aa7..9f493b9aee02 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -20,6 +20,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +//#define DEBUG(X) do { X; } while (0) + /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const /// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s /// describing the operand info for each ARMInsts[i]. @@ -93,6 +95,9 @@ static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister, RegClassID = ARM::DPRRegClassID; } + // For this purpose, we can treat rGPR as if it were GPR. + if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID; + // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm(). unsigned RegNum = RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister; @@ -451,12 +456,23 @@ static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) { // // A8-11: DecodeImmShift() static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) { - // If type == 0b11 and imm5 == 0, we have an rrx, instead. - if (ShOp == ARM_AM::ror && ShImm == 0) - ShOp = ARM_AM::rrx; - // If (lsr or asr) and imm5 == 0, shift amount is 32. - if ((ShOp == ARM_AM::lsr || ShOp == ARM_AM::asr) && ShImm == 0) + if (ShImm != 0) + return; + switch (ShOp) { + case ARM_AM::no_shift: + case ARM_AM::rrx: + break; + case ARM_AM::lsl: + ShOp = ARM_AM::no_shift; + break; + case ARM_AM::lsr: + case ARM_AM::asr: ShImm = 32; + break; + case ARM_AM::ror: + ShOp = ARM_AM::rrx; + break; + } } // getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding @@ -490,9 +506,6 @@ static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) { static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { - if (Opcode == ARM::Int_MemBarrierV7 || Opcode == ARM::Int_SyncBarrierV7) - return true; - assert(0 && "Unexpected pseudo instruction!"); return false; } @@ -887,7 +900,6 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } - assert(0 && "Unexpected BrMiscFrm Opcode"); return false; } @@ -906,34 +918,6 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) { return true; } -static inline bool SaturateOpcode(unsigned Opcode) { - switch (Opcode) { - case ARM::SSATlsl: case ARM::SSATasr: case ARM::SSAT16: - case ARM::USATlsl: case ARM::USATasr: case ARM::USAT16: - return true; - default: - return false; - } -} - -static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) { - switch (Opcode) { - case ARM::SSATlsl: - case ARM::SSATasr: - return slice(insn, 20, 16) + 1; - case ARM::SSAT16: - return slice(insn, 19, 16) + 1; - case ARM::USATlsl: - case ARM::USATasr: - return slice(insn, 20, 16); - case ARM::USAT16: - return slice(insn, 19, 16); - default: - assert(0 && "Invalid opcode passed in"); - return 0; - } -} - // A major complication is the fact that some of the saturating add/subtract // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm. // They are QADD, QDADD, QDSUB, and QSUB. @@ -959,40 +943,14 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpIdx >= NumOps) return false; - // SSAT/SSAT16/USAT/USAT16 has imm operand after Rd. - if (SaturateOpcode(Opcode)) { - MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - - if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) { - OpIdx += 2; - return true; - } - - // For SSAT operand reg (Rm) has been disassembled above. - // Now disassemble the shift amount. - - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShAmt = slice(insn, 11, 7); - - // A8.6.183. Possible ASR shift amount of 32... - if (Opcode == ARM::SSATasr && ShAmt == 0) - ShAmt = 32; - - MI.addOperand(MCOperand::CreateImm(ShAmt)); - - OpIdx += 3; - return true; - } - // Special-case handling of BFC/BFI/SBFX/UBFX. if (Opcode == ARM::BFC || Opcode == ARM::BFI) { - // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI. - MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0 - : getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(0)); + if (Opcode == ARM::BFI) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); + ++OpIdx; + } uint32_t mask = 0; if (!getBFCInvMask(insn, mask)) return false; @@ -1498,13 +1456,55 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Extract the 5-bit immediate field Inst{11-7}. unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F; - MI.addOperand(MCOperand::CreateImm(ShiftAmt)); + ARM_AM::ShiftOpc Opc = ARM_AM::no_shift; + if (Opcode == ARM::PKHBT) + Opc = ARM_AM::lsl; + else if (Opcode == ARM::PKHBT) + Opc = ARM_AM::asr; + getImmShiftSE(Opc, ShiftAmt); + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt))); ++OpIdx; } return true; } +/// DisassembleSatFrm - Disassemble saturate instructions: +/// SSAT, SSAT16, USAT, and USAT16. +static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + + const TargetInstrDesc &TID = ARMInsts[Opcode]; + NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands + + // Disassemble register def. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + decodeRd(insn)))); + + unsigned Pos = slice(insn, 20, 16); + if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16) + Pos += 1; + MI.addOperand(MCOperand::CreateImm(Pos)); + + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + decodeRm(insn)))); + + if (NumOpsAdded == 4) { + ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl); + // Inst{11-7} encodes the imm5 shift amount. + unsigned ShAmt = slice(insn, 11, 7); + if (ShAmt == 0) { + // A8.6.183. Possible ASR shift amount of 32... + if (Opc == ARM_AM::asr) + ShAmt = 32; + else + Opc = ARM_AM::no_shift; + } + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt))); + } + return true; +} + // Extend instructions. // SXT* and UXT*: Rd [Rn] Rm [rot_imm]. // The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the @@ -1863,7 +1863,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3"); - bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS) ? true : false; + bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS); unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; // Extract Dd/Sd for operand 0. @@ -1886,7 +1886,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // VFP Load/Store Multiple Instructions. // This is similar to the algorithm for LDM/STM in that operand 0 (the base) and -// operand 1 (the AM5 mode imm) is followed by two predicate operands. It is +// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is // followed by a reglist of either DPR(s) or SPR(s). // // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD] @@ -1910,16 +1910,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(Base)); - // Next comes the AM5 Opcode. + // Next comes the AM4 Opcode. ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); // Must be either "ia" or "db" submode. if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) { - DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n"); + DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n"); return false; } - - unsigned char Imm8 = insn & 0xFF; - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8))); + MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode))); // Handling the two predicate operands before the reglist. int64_t CondVal = insn >> ARMII::CondShift; @@ -1929,13 +1927,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx += 4; bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD || - Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD) ? true : false; + Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD); unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; // Extract Dd/Sd. unsigned RegD = decodeVFPRd(insn, isSPVFP); // Fill the variadic part of reglist. + unsigned char Imm8 = insn & 0xFF; unsigned Regs = isSPVFP ? Imm8 : Imm8/2; for (unsigned i = 0; i < Regs; ++i) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, @@ -2244,9 +2243,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // We have homogeneous NEON registers for Load/Store. unsigned RegClass = 0; + bool DRegPair = UseDRegPair(Opcode); // Double-spaced registers have increments of 2. - unsigned Inc = DblSpaced ? 2 : 1; + unsigned Inc = (DblSpaced || DRegPair) ? 2 : 1; unsigned Rn = decodeRn(insn); unsigned Rm = decodeRm(insn); @@ -2292,8 +2292,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, RegClass = OpInfo[OpIdx].RegClass; while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd, - UseDRegPair(Opcode)))); + getRegisterEnum(B, RegClass, Rd, DRegPair))); Rd += Inc; ++OpIdx; } @@ -2312,8 +2311,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd, - UseDRegPair(Opcode)))); + getRegisterEnum(B, RegClass, Rd, DRegPair))); Rd += Inc; ++OpIdx; } @@ -2351,6 +2349,11 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } } + // Accessing registers past the end of the NEON register file is not + // defined. + if (Rd > 32) + return false; + return true; } @@ -2423,10 +2426,14 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, break; case ARM::VMOVv4i16: case ARM::VMOVv8i16: + case ARM::VMVNv4i16: + case ARM::VMVNv8i16: esize = ESize16; break; case ARM::VMOVv2i32: case ARM::VMOVv4i32: + case ARM::VMVNv2i32: + case ARM::VMVNv4i32: esize = ESize32; break; case ARM::VMOVv1i64: @@ -2944,7 +2951,7 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.6.49 ISB static inline bool MemBarrierInstr(uint32_t insn) { unsigned op7_4 = slice(insn, 7, 4); - if (slice(insn, 31, 20) == 0xf57 && (op7_4 >= 4 && op7_4 <= 6)) + if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6)) return true; return false; @@ -3001,8 +3008,15 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - if (MemBarrierInstr(insn)) + if (MemBarrierInstr(insn)) { + // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care + // of within the generic ARMBasicMCBuilder::BuildIt() method. + // + // Inst{3-0} encodes the memory barrier option for the variants. + MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0))); + NumOpsAdded = 1; return true; + } switch (Opcode) { case ARM::CLREX: @@ -3073,6 +3087,7 @@ static const DisassembleFP FuncPtrs[] = { &DisassembleLdStMulFrm, &DisassembleLdStExFrm, &DisassembleArithMiscFrm, + &DisassembleSatFrm, &DisassembleExtFrm, &DisassembleVFPUnaryFrm, &DisassembleVFPBinaryFrm, diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index 7d21256a14f9..9c30d332d1f2 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -23,7 +23,8 @@ #include "llvm/MC/MCInst.h" #include "llvm/Target/TargetInstrInfo.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" +#include "ARMRegisterInfo.h" #include "ARMDisassembler.h" namespace llvm { @@ -53,36 +54,35 @@ public: ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \ ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \ ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \ - ENTRY(ARM_FORMAT_EXTFRM, 13) \ - ENTRY(ARM_FORMAT_VFPUNARYFRM, 14) \ - ENTRY(ARM_FORMAT_VFPBINARYFRM, 15) \ - ENTRY(ARM_FORMAT_VFPCONV1FRM, 16) \ - ENTRY(ARM_FORMAT_VFPCONV2FRM, 17) \ - ENTRY(ARM_FORMAT_VFPCONV3FRM, 18) \ - ENTRY(ARM_FORMAT_VFPCONV4FRM, 19) \ - ENTRY(ARM_FORMAT_VFPCONV5FRM, 20) \ - ENTRY(ARM_FORMAT_VFPLDSTFRM, 21) \ - ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \ - ENTRY(ARM_FORMAT_VFPMISCFRM, 23) \ - ENTRY(ARM_FORMAT_THUMBFRM, 24) \ - ENTRY(ARM_FORMAT_NEONFRM, 25) \ - ENTRY(ARM_FORMAT_NEONGETLNFRM, 26) \ - ENTRY(ARM_FORMAT_NEONSETLNFRM, 27) \ - ENTRY(ARM_FORMAT_NEONDUPFRM, 28) \ - ENTRY(ARM_FORMAT_MISCFRM, 29) \ - ENTRY(ARM_FORMAT_THUMBMISCFRM, 30) \ - ENTRY(ARM_FORMAT_NLdSt, 31) \ - ENTRY(ARM_FORMAT_N1RegModImm, 32) \ - ENTRY(ARM_FORMAT_N2Reg, 33) \ - ENTRY(ARM_FORMAT_NVCVT, 34) \ - ENTRY(ARM_FORMAT_NVecDupLn, 35) \ - ENTRY(ARM_FORMAT_N2RegVecShL, 36) \ - ENTRY(ARM_FORMAT_N2RegVecShR, 37) \ - ENTRY(ARM_FORMAT_N3Reg, 38) \ - ENTRY(ARM_FORMAT_N3RegVecSh, 39) \ - ENTRY(ARM_FORMAT_NVecExtract, 40) \ - ENTRY(ARM_FORMAT_NVecMulScalar, 41) \ - ENTRY(ARM_FORMAT_NVTBL, 42) + ENTRY(ARM_FORMAT_SATFRM, 13) \ + ENTRY(ARM_FORMAT_EXTFRM, 14) \ + ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \ + ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \ + ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \ + ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \ + ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \ + ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \ + ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \ + ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \ + ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \ + ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \ + ENTRY(ARM_FORMAT_THUMBFRM, 25) \ + ENTRY(ARM_FORMAT_MISCFRM, 26) \ + ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \ + ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \ + ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \ + ENTRY(ARM_FORMAT_NLdSt, 30) \ + ENTRY(ARM_FORMAT_N1RegModImm, 31) \ + ENTRY(ARM_FORMAT_N2Reg, 32) \ + ENTRY(ARM_FORMAT_NVCVT, 33) \ + ENTRY(ARM_FORMAT_NVecDupLn, 34) \ + ENTRY(ARM_FORMAT_N2RegVecShL, 35) \ + ENTRY(ARM_FORMAT_N2RegVecShR, 36) \ + ENTRY(ARM_FORMAT_N3Reg, 37) \ + ENTRY(ARM_FORMAT_N3RegVecSh, 38) \ + ENTRY(ARM_FORMAT_NVecExtract, 39) \ + ENTRY(ARM_FORMAT_NVecMulScalar, 40) \ + ENTRY(ARM_FORMAT_NVTBL, 41) // ARM instruction format specifies the encoding used by the instruction. #define ENTRY(n, v) n = v, @@ -126,8 +126,8 @@ static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) { } /// Utility function for setting [From, To] bits to Val for a uint32_t. -static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To, - uint32_t Val) { +static inline void setSlice(unsigned &Bits, unsigned From, unsigned To, + unsigned Val) { assert(From < 32 && To < 32 && From >= To); uint32_t Mask = ((1 << (From - To + 1)) - 1); Bits &= ~(Mask << To); diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 4b7a0bf6fdb9..112817b13cf9 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -103,7 +103,7 @@ static inline unsigned getT1Cond(uint32_t insn) { } static inline bool IsGPR(unsigned RegClass) { - return RegClass == ARM::GPRRegClassID; + return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID; } // Utilities for 32-bit Thumb instructions. @@ -220,7 +220,7 @@ static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5, switch (bits2) { default: assert(0 && "No such value"); case 0: - ShOp = ARM_AM::lsl; + ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl); return imm5; case 1: ShOp = ARM_AM::lsr; @@ -1324,7 +1324,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, && OpInfo[1].RegClass == ARM::GPRRegClassID && OpInfo[2].RegClass < 0 && OpInfo[3].RegClass < 0 - && "Exactlt 4 operands expect and first two as reg operands"); + && "Exactly 4 operands expect and first two as reg operands"); // Only need to populate the src reg operand. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); @@ -1338,17 +1338,20 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID + && (OpInfo[0].RegClass == ARM::GPRRegClassID || + OpInfo[0].RegClass == ARM::rGPRRegClassID) + && (OpInfo[1].RegClass == ARM::GPRRegClassID || + OpInfo[1].RegClass == ARM::rGPRRegClassID) && "Expect >= 2 operands and first two as reg operands"); - bool ThreeReg = (NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID); + bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID || + OpInfo[2].RegClass == ARM::rGPRRegClassID)); bool NoDstReg = (decodeRs(insn) == 0xF); // Build the register operands, followed by the constant shift specifier. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, + getRegisterEnum(B, OpInfo[0].RegClass, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; @@ -1359,7 +1362,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MI.getOperand(Idx)); ++OpIdx; } else if (!NoDstReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass, decodeRn(insn)))); ++OpIdx; } else { @@ -1368,7 +1371,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, } } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeRm(insn)))); ++OpIdx; @@ -1386,14 +1389,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned imm5 = getShiftAmtBits(insn); ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift; unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp); - - // PKHBT/PKHTB are special in that we need the decodeImmShift() call to - // decode the shift amount from raw imm5 and bits2, but we DO NOT need - // to encode the ShOp, as it's in the asm string already. - if (Opcode == ARM::t2PKHBT || Opcode == ARM::t2PKHTB) - MI.addOperand(MCOperand::CreateImm(ShAmt)); - else - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt))); + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt))); } ++OpIdx; } @@ -1416,16 +1412,20 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, OpIdx = 0; - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID + unsigned RdRegClassID = OpInfo[0].RegClass; + assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID || + RdRegClassID == ARM::rGPRRegClassID) && "Expect >= 2 operands and first one as reg operand"); - bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID); + unsigned RnRegClassID = OpInfo[1].RegClass; + bool TwoReg = (RnRegClassID == ARM::GPRRegClassID + || RnRegClassID == ARM::rGPRRegClassID); bool NoDstReg = (decodeRs(insn) == 0xF); // Build the register operands, followed by the modified immediate. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, + getRegisterEnum(B, RdRegClassID, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; @@ -1434,7 +1434,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n"); return false; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1455,30 +1455,48 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, static inline bool Thumb2SaturateOpcode(unsigned Opcode) { switch (Opcode) { - case ARM::t2SSATlsl: case ARM::t2SSATasr: case ARM::t2SSAT16: - case ARM::t2USATlsl: case ARM::t2USATasr: case ARM::t2USAT16: + case ARM::t2SSAT: case ARM::t2SSAT16: + case ARM::t2USAT: case ARM::t2USAT16: return true; default: return false; } } -static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { - switch (Opcode) { - case ARM::t2SSATlsl: - case ARM::t2SSATasr: - return slice(insn, 4, 0) + 1; - case ARM::t2SSAT16: - return slice(insn, 3, 0) + 1; - case ARM::t2USATlsl: - case ARM::t2USATasr: - return slice(insn, 4, 0); - case ARM::t2USAT16: - return slice(insn, 3, 0); - default: - assert(0 && "Unexpected opcode"); - return 0; +/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions: +/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt +/// o t2SSAT16, t2USAT16: Rs sat_pos Rn +static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, + unsigned &NumOpsAdded, BO B) { + const TargetInstrDesc &TID = ARMInsts[Opcode]; + NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands + + // Disassemble the register def. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + decodeRs(insn)))); + + unsigned Pos = slice(insn, 4, 0); + if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16) + Pos += 1; + MI.addOperand(MCOperand::CreateImm(Pos)); + + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + decodeRn(insn)))); + + if (NumOpsAdded == 4) { + ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ? + ARM_AM::asr : ARM_AM::lsl); + // Inst{14-12:7-6} encodes the imm5 shift amount. + unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6); + if (ShAmt == 0) { + if (Opc == ARM_AM::asr) + ShAmt = 32; + else + Opc = ARM_AM::no_shift; + } + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt))); } + return true; } // A6.3.3 Data-processing (plain binary immediate) @@ -1492,11 +1510,6 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { // o t2SBFX (SBFX): Rs Rn lsb width // o t2UBFX (UBFX): Rs Rn lsb width // o t2BFI (BFI): Rs Rn lsb width -// -// [Signed|Unsigned] Saturate [16] -// -// o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt -// o t2SSAT16, t2USAT16: Rs sat_pos Rn static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -1506,41 +1519,21 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, OpIdx = 0; - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID + unsigned RdRegClassID = OpInfo[0].RegClass; + assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID || + RdRegClassID == ARM::rGPRRegClassID) && "Expect >= 2 operands and first one as reg operand"); - bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID); + unsigned RnRegClassID = OpInfo[1].RegClass; + bool TwoReg = (RnRegClassID == ARM::GPRRegClassID + || RnRegClassID == ARM::rGPRRegClassID); // Build the register operand(s), followed by the immediate(s). - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID, decodeRs(insn)))); ++OpIdx; - // t2SSAT/t2SSAT16/t2USAT/t2USAT16 has imm operand after Rd. - if (Thumb2SaturateOpcode(Opcode)) { - MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - - if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) { - OpIdx += 2; - return true; - } - - // For SSAT operand reg (Rn) has been disassembled above. - // Now disassemble the shift amount. - - // Inst{14-12:7-6} encodes the imm5 shift amount. - unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6); - - MI.addOperand(MCOperand::CreateImm(ShAmt)); - - OpIdx += 3; - return true; - } - if (TwoReg) { assert(NumOps >= 3 && "Expect >= 3 operands"); int Idx; @@ -1549,12 +1542,19 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MI.getOperand(Idx)); } else { // Add src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, decodeRn(insn)))); } ++OpIdx; } + if (Opcode == ARM::t2BFI) { + // Add val reg operand. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, + decodeRn(insn)))); + ++OpIdx; + } + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1567,7 +1567,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC) { + else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { uint32_t mask = 0; if (getBitfieldInvMask(insn, mask)) MI.addOperand(MCOperand::CreateImm(mask)); @@ -1575,17 +1575,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, return false; } else { // Handle the case of: lsb width - assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX || - Opcode == ARM::t2BFI) && "Unexpected opcode"); + assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX) + && "Unexpected opcode"); MI.addOperand(MCOperand::CreateImm(getLsb(insn))); - if (Opcode == ARM::t2BFI) { - if (getMsb(insn) < getLsb(insn)) { - DEBUG(errs() << "Encoding error: msb < lsb\n"); - return false; - } - MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1)); - } else - MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); + MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); ++OpIdx; } @@ -1618,8 +1611,8 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) { // A8.6.26 // t2BXJ -> Rn // -// Miscellaneous control: t2Int_MemBarrierV7 (and its t2DMB variants), -// t2Int_SyncBarrierV7 (and its t2DSB varianst), t2ISBsy, t2CLREX +// Miscellaneous control: t2DMBsy (and its t2DMB variants), +// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX // -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants) // // Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV @@ -1959,25 +1952,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass == ARM::rGPRRegClassID && + OpInfo[1].RegClass == ARM::rGPRRegClassID && "Expect >= 2 operands and first two as reg operands"); // Build the register operands, followed by the optional rotation amount. - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; + bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRs(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -2009,26 +2002,26 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && - OpInfo[2].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass == ARM::rGPRRegClassID && + OpInfo[1].RegClass == ARM::rGPRRegClassID && + OpInfo[2].RegClass == ARM::rGPRRegClassID && "Expect >= 3 operands and first three as reg operands"); // Build the register operands. - bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID; + bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRm(insn)))); if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRd(insn)))); NumOpsAdded = FourReg ? 4 : 3; @@ -2054,26 +2047,26 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && - OpInfo[2].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass == ARM::rGPRRegClassID && + OpInfo[1].RegClass == ARM::rGPRRegClassID && + OpInfo[2].RegClass == ARM::rGPRRegClassID && "Expect >= 3 operands and first three as reg operands"); - bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID; + bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID; // Build the register operands. if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRm(insn)))); if (FourReg) @@ -2152,22 +2145,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, break; case 2: if (op == 0) { - if (slice(op2, 5, 5) == 0) { + if (slice(op2, 5, 5) == 0) // Data-processing (modified immediate) return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } else { - // Data-processing (plain binary immediate) - return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - } else { - // Branches and miscellaneous control on page A6-20. - return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } + if (Thumb2SaturateOpcode(Opcode)) + return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B); - break; + // Data-processing (plain binary immediate) + return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded, + B); + } + // Branches and miscellaneous control on page A6-20. + return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded, + B); case 3: switch (slice(op2, 6, 5)) { case 0: diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index 9e3ff29e07c4..b3fcfaf6bda7 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -14,10 +14,11 @@ TARGET = ARM # Make sure that tblgen is run, first thing. BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ - ARMGenInstrInfo.inc ARMGenAsmWriter.inc \ + ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ ARMGenDAGISel.inc ARMGenSubtarget.inc \ ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ - ARMGenDecoderTables.inc ARMGenEDInfo.inc + ARMGenDecoderTables.inc ARMGenEDInfo.inc \ + ARMGenFastISel.inc DIRS = AsmPrinter AsmParser Disassembler TargetInfo diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index bbdd3c7f7c3e..97e54bfaed9e 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -24,7 +24,7 @@ STATISTIC(NumVMovs, "Number of reg-reg moves converted"); namespace { struct NEONMoveFixPass : public MachineFunctionPass { static char ID; - NEONMoveFixPass() : MachineFunctionPass(&ID) {} + NEONMoveFixPass() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &Fn); diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index f67717cdd56f..3407ac6fe08e 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -23,7 +23,7 @@ namespace { public: static char ID; - NEONPreAllocPass() : MachineFunctionPass(&ID) {} + NEONPreAllocPass() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -51,13 +51,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, default: break; - case ARM::VLD1q8: - case ARM::VLD1q16: - case ARM::VLD1q32: - case ARM::VLD1q64: - case ARM::VLD2d8: - case ARM::VLD2d16: - case ARM::VLD2d32: case ARM::VLD2LNd8: case ARM::VLD2LNd16: case ARM::VLD2LNd32: @@ -65,13 +58,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 2; return true; - case ARM::VLD2q8: - case ARM::VLD2q16: - case ARM::VLD2q32: - FirstOpnd = 0; - NumRegs = 4; - return true; - case ARM::VLD2LNq16: case ARM::VLD2LNq32: FirstOpnd = 0; @@ -88,10 +74,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VLD3d8: - case ARM::VLD3d16: - case ARM::VLD3d32: - case ARM::VLD1d64T: case ARM::VLD3LNd8: case ARM::VLD3LNd16: case ARM::VLD3LNd32: @@ -99,24 +81,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 3; return true; - case ARM::VLD3q8_UPD: - case ARM::VLD3q16_UPD: - case ARM::VLD3q32_UPD: - FirstOpnd = 0; - NumRegs = 3; - Offset = 0; - Stride = 2; - return true; - - case ARM::VLD3q8odd_UPD: - case ARM::VLD3q16odd_UPD: - case ARM::VLD3q32odd_UPD: - FirstOpnd = 0; - NumRegs = 3; - Offset = 1; - Stride = 2; - return true; - case ARM::VLD3LNq16: case ARM::VLD3LNq32: FirstOpnd = 0; @@ -133,10 +97,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VLD4d8: - case ARM::VLD4d16: - case ARM::VLD4d32: - case ARM::VLD1d64Q: case ARM::VLD4LNd8: case ARM::VLD4LNd16: case ARM::VLD4LNd32: @@ -144,24 +104,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VLD4q8_UPD: - case ARM::VLD4q16_UPD: - case ARM::VLD4q32_UPD: - FirstOpnd = 0; - NumRegs = 4; - Offset = 0; - Stride = 2; - return true; - - case ARM::VLD4q8odd_UPD: - case ARM::VLD4q16odd_UPD: - case ARM::VLD4q32odd_UPD: - FirstOpnd = 0; - NumRegs = 4; - Offset = 1; - Stride = 2; - return true; - case ARM::VLD4LNq16: case ARM::VLD4LNq32: FirstOpnd = 0; @@ -178,13 +120,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VST1q8: - case ARM::VST1q16: - case ARM::VST1q32: - case ARM::VST1q64: - case ARM::VST2d8: - case ARM::VST2d16: - case ARM::VST2d32: case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: @@ -192,13 +127,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 2; return true; - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - FirstOpnd = 2; - NumRegs = 4; - return true; - case ARM::VST2LNq16: case ARM::VST2LNq32: FirstOpnd = 2; @@ -215,10 +143,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VST3d8: - case ARM::VST3d16: - case ARM::VST3d32: - case ARM::VST1d64T: case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: @@ -226,24 +150,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 3; return true; - case ARM::VST3q8_UPD: - case ARM::VST3q16_UPD: - case ARM::VST3q32_UPD: - FirstOpnd = 4; - NumRegs = 3; - Offset = 0; - Stride = 2; - return true; - - case ARM::VST3q8odd_UPD: - case ARM::VST3q16odd_UPD: - case ARM::VST3q32odd_UPD: - FirstOpnd = 4; - NumRegs = 3; - Offset = 1; - Stride = 2; - return true; - case ARM::VST3LNq16: case ARM::VST3LNq32: FirstOpnd = 2; @@ -260,10 +166,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VST4d8: - case ARM::VST4d16: - case ARM::VST4d32: - case ARM::VST1d64Q: case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: @@ -271,24 +173,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VST4q8_UPD: - case ARM::VST4q16_UPD: - case ARM::VST4q32_UPD: - FirstOpnd = 4; - NumRegs = 4; - Offset = 0; - Stride = 2; - return true; - - case ARM::VST4q8odd_UPD: - case ARM::VST4q16odd_UPD: - case ARM::VST4q32odd_UPD: - FirstOpnd = 4; - NumRegs = 4; - Offset = 1; - Stride = 2; - return true; - case ARM::VST4LNq16: case ARM::VST4LNq32: FirstOpnd = 2; @@ -468,7 +352,34 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { continue; if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) continue; - llvm_unreachable("expected a REG_SEQUENCE"); + + MachineBasicBlock::iterator NextI = llvm::next(MBBI); + for (unsigned R = 0; R < NumRegs; ++R) { + MachineOperand &MO = MI->getOperand(FirstOpnd + R); + assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + + // For now, just assign a fixed set of adjacent registers. + // This leaves plenty of room for future improvements. + static const unsigned NEONDRegs[] = { + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 + }; + MO.setReg(NEONDRegs[Offset + R * Stride]); + + if (MO.isUse()) { + // Insert a copy from VirtReg. + BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),MO.getReg()) + .addReg(VirtReg, getKillRegState(MO.isKill())); + MO.setIsKill(); + } else if (MO.isDef() && !MO.isDead()) { + // Add a copy to VirtReg. + BuildMI(MBB, NextI, DebugLoc(), TII->get(TargetOpcode::COPY), VirtReg) + .addReg(MO.getReg()); + } + } } return Modified; diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 0cb8ff01181d..9fc3fb92cb2c 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -611,27 +611,6 @@ constant which was already loaded). Not sure what's necessary to do that. //===---------------------------------------------------------------------===// -Given the following on ARMv7: -int test1(int A, int B) { - return (A&-8388481)|(B&8388480); -} - -We currently generate: - bfc r0, #7, #16 - movw r2, #:lower16:8388480 - movt r2, #:upper16:8388480 - and r1, r1, r2 - orr r0, r1, r0 - bx lr - -The following is much shorter: - lsr r1, r1, #7 - bfi r0, r1, #7, #16 - bx lr - - -//===---------------------------------------------------------------------===// - The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal: int a(int x) { return __builtin_bswap32(x); } @@ -657,3 +636,24 @@ A custom Thumb version would also be a slight improvement over the generic version. //===---------------------------------------------------------------------===// + +Consider the following simple C code: + +void foo(unsigned char *a, unsigned char *b, int *c) { + if ((*a | *b) == 0) *c = 0; +} + +currently llvm-gcc generates something like this (nice branchless code I'd say): + + ldrb r0, [r0] + ldrb r1, [r1] + orr r0, r1, r0 + tst r0, #255 + moveq r0, #0 + streq r0, [r2] + bx lr + +Note that both "tst" and "moveq" are redundant. + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 39b70b43b23f..a21a3da10bda 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -68,7 +68,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg); } -bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { +bool Thumb1RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); // It's not always a good idea to include the call frame as part of the @@ -363,107 +363,19 @@ static void removeOperands(MachineInstr &MI, unsigned i) { MI.RemoveOperand(Op); } -int Thumb1RegisterInfo:: -rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int Offset, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const -{ - // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo - // version then can pull out Thumb1 specific parts here - return 0; -} - -/// saveScavengerRegister - Spill the register so it can be used by the -/// register scavenger. Return true. -bool -Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator &UseMI, - const TargetRegisterClass *RC, - unsigned Reg) const { - // Thumb1 can't use the emergency spill slot on the stack because - // ldr/str immediate offsets must be positive, and if we're referencing - // off the frame pointer (if, for example, there are alloca() calls in - // the function, the offset will be negative. Use R12 instead since that's - // a call clobbered register that we know won't be used in Thumb1 mode. - DebugLoc DL; - BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)). - addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill); - - // The UseMI is where we would like to restore the register. If there's - // interference with R12 before then, however, we'll need to restore it - // before that instead and adjust the UseMI. - bool done = false; - for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { - if (II->isDebugValue()) - continue; - // If this instruction affects R12, adjust our restore point. - for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = II->getOperand(i); - if (!MO.isReg() || MO.isUndef() || !MO.getReg() || - TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - if (MO.getReg() == ARM::R12) { - UseMI = II; - done = true; - break; - } - } - } - // Restore the register from R12 - BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)). - addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill); - - return true; -} - -unsigned -Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const{ - unsigned VReg = 0; - unsigned i = 0; +bool Thumb1RegisterInfo:: +rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - unsigned FrameReg = ARM::SP; - int FrameIndex = MI.getOperand(i).getIndex(); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MF.getFrameInfo()->getStackSize() + SPAdj; - - if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea1Offset(); - else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea2Offset(); - else if (MF.getFrameInfo()->hasVarSizedObjects()) { - assert(SPAdj == 0 && hasFP(MF) && "Unexpected"); - // There are alloca()'s in this function, must reference off the frame - // pointer instead. - FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); - } - - // Special handling of dbg_value instructions. - if (MI.isDebugValue()) { - MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); - return 0; - } - unsigned Opcode = MI.getOpcode(); const TargetInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); if (Opcode == ARM::tADDrSPi) { - Offset += MI.getOperand(i+1).getImm(); + Offset += MI.getOperand(FrameRegIdx+1).getImm(); // Can't use tADDrSPi if it's based off the frame pointer. unsigned NumBits = 0; @@ -483,12 +395,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) { // Turn it into a move. MI.setDesc(TII.get(ARM::tMOVgpr2tgpr)); - MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); // Remove offset and remaining explicit predicate operands. - do MI.RemoveOperand(i+1); - while (MI.getNumOperands() > i+1 && - (!MI.getOperand(i+1).isReg() || !MI.getOperand(i+1).isImm())); - return 0; + do MI.RemoveOperand(FrameRegIdx+1); + while (MI.getNumOperands() > FrameRegIdx+1 && + (!MI.getOperand(FrameRegIdx+1).isReg() || + !MI.getOperand(FrameRegIdx+1).isImm())); + return true; } // Common case: small offset, fits into instruction. @@ -496,15 +409,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (((Offset / Scale) & ~Mask) == 0) { // Replace the FrameIndex with sp / fp if (Opcode == ARM::tADDi3) { - removeOperands(MI, i); + removeOperands(MI, FrameRegIdx); MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) .addImm(Offset / Scale)); } else { - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale); } - return 0; + return true; } unsigned DestReg = MI.getOperand(0).getReg(); @@ -516,7 +429,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, *this, dl); MBB.erase(II); - return 0; + return true; } if (Offset > 0) { @@ -524,12 +437,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // r0 = add sp, 255*4 // r0 = add r0, (imm - 255*4) if (Opcode == ARM::tADDi3) { - removeOperands(MI, i); + removeOperands(MI, FrameRegIdx); MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); } else { - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Mask); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask); } Offset = (Offset - Mask * Scale); MachineBasicBlock::iterator NII = llvm::next(II); @@ -542,14 +455,14 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); MI.setDesc(TII.get(ARM::tADDhirr)); - MI.getOperand(i).ChangeToRegister(DestReg, false, false, true); - MI.getOperand(i+1).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true); + MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false); if (Opcode == ARM::tADDi3) { MachineInstrBuilder MIB(&MI); AddDefaultPred(MIB); } } - return 0; + return true; } else { unsigned ImmIdx = 0; int InstrOffs = 0; @@ -557,7 +470,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Scale = 1; switch (AddrMode) { case ARMII::AddrModeT1_s: { - ImmIdx = i+1; + ImmIdx = FrameRegIdx+1; InstrOffs = MI.getOperand(ImmIdx).getImm(); NumBits = (FrameReg == ARM::SP) ? 8 : 5; Scale = 4; @@ -577,9 +490,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Mask = (1 << NumBits) - 1; if ((unsigned)Offset <= Mask * Scale) { // Replace the FrameIndex with sp - MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); ImmOp.ChangeToImmediate(ImmedOffset); - return 0; + return true; } bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; @@ -600,12 +513,126 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset &= ~(Mask*Scale); } } + return Offset == 0; +} + +void +Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const { + MachineInstr &MI = *I; + int Off = Offset; // ARM doesn't need the general 64-bit offsets + unsigned i = 0; + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + bool Done = false; + Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII); + assert (Done && "Unable to resolve frame index!"); +} + +/// saveScavengerRegister - Spill the register so it can be used by the +/// register scavenger. Return true. +bool +Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const { + // Thumb1 can't use the emergency spill slot on the stack because + // ldr/str immediate offsets must be positive, and if we're referencing + // off the frame pointer (if, for example, there are alloca() calls in + // the function, the offset will be negative. Use R12 instead since that's + // a call clobbered register that we know won't be used in Thumb1 mode. + DebugLoc DL; + BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)). + addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill); + + // The UseMI is where we would like to restore the register. If there's + // interference with R12 before then, however, we'll need to restore it + // before that instead and adjust the UseMI. + bool done = false; + for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { + if (II->isDebugValue()) + continue; + // If this instruction affects R12, adjust our restore point. + for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = II->getOperand(i); + if (!MO.isReg() || MO.isUndef() || !MO.getReg() || + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + if (MO.getReg() == ARM::R12) { + UseMI = II; + done = true; + break; + } + } + } + // Restore the register from R12 + BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)). + addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill); + + return true; +} + +void +Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + unsigned VReg = 0; + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc dl = MI.getDebugLoc(); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + unsigned FrameReg = ARM::SP; + int FrameIndex = MI.getOperand(i).getIndex(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MF.getFrameInfo()->getStackSize() + SPAdj; + + if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea2Offset(); + else if (MF.getFrameInfo()->hasVarSizedObjects()) { + assert(SPAdj == 0 && hasFP(MF) && "Unexpected"); + // There are alloca()'s in this function, must reference off the frame + // pointer or base pointer instead. + if (!hasBasePointer(MF)) { + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } else + FrameReg = BasePtr; + } + + // Special handling of dbg_value instructions. + if (MI.isDebugValue()) { + MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return; + } + + // Modify MI as necessary to handle as much of 'Offset' as possible + assert(AFI->isThumbFunction() && + "This eliminateFrameIndex only supports Thumb1!"); + if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII)) + return; // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above, handle the rest, providing a register that is // SP+LargeImm. assert(Offset && "This code isn't needed if offset already handled!"); + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = MI.getDesc(); + // Remove predicate first. int PIdx = MI.findFirstPredOperandIdx(); if (PIdx != -1) @@ -637,11 +664,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.addOperand(MachineOperand::CreateReg(0, false)); } else if (Desc.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); - assert (Value && "Frame index virtual allocated, but Value arg is NULL!"); bool UseRR = false; - bool TrackVReg = true; - Value->first = FrameReg; // use the frame register as a kind indicator - Value->second = Offset; if (Opcode == ARM::tSpill) { if (FrameReg == ARM::SP) @@ -650,7 +673,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, else { emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); UseRR = true; - TrackVReg = false; } } else emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII, @@ -661,8 +683,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); else // tSTR has an extra register operand. MI.addOperand(MachineOperand::CreateReg(0, false)); - if (!ReuseFrameIndexVals || !TrackVReg) - VReg = 0; } else assert(false && "Unexpected opcode!"); @@ -671,7 +691,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstrBuilder MIB(&MI); AddDefaultPred(MIB); } - return VReg; } void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { @@ -742,11 +761,11 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { dl = MBBI->getDebugLoc(); } - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - if (STI.isTargetDarwin() || hasFP(MF)) { + // Adjust FP so it point to the stack slot that contains the previous FP. + if (hasFP(MF)) { BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0); + AFI->setShouldRestoreSPFromFP(true); } // Determine starting offsets of spill areas. @@ -764,14 +783,20 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); } - if (STI.isTargetELF() && hasFP(MF)) { + if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); - } AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + + // If we need a base pointer, set it up here. It's whatever the value + // of the stack pointer is at this point. Any variable size objects + // will be allocated after this, so we can still use the base pointer + // to reference locals. + if (hasBasePointer(MF)) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP); } static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { @@ -828,7 +853,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); - if (hasFP(MF)) { + if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot or target is ELF and the function has FP. diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 9a0308afa20c..c578054a5d71 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -38,27 +38,27 @@ public: unsigned PredReg = 0) const; /// Code Generation virtual methods... - bool hasReservedCallFrame(MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - // rewrite MI to access 'Offset' bytes from the FP. Return the offset that - // could not be handled directly in MI. - int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int Offset, - unsigned MOVOpc, unsigned ADDriOpc, - unsigned SUBriOpc) const; - + // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be + // however much remains to be handled. Return 'true' if no further + // work is required. + bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII) const; + void resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const; bool saveScavengerRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC, unsigned Reg) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index cd15bbed9f23..45e693744b80 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -27,7 +27,7 @@ namespace { public: static char ID; - Thumb2ITBlockPass() : MachineFunctionPass(&ID) {} + Thumb2ITBlockPass() : MachineFunctionPass(ID) {} const Thumb2InstrInfo *TII; const TargetRegisterInfo *TRI; @@ -91,35 +91,53 @@ static void TrackDefUses(MachineInstr *MI, } } +static bool isCopy(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: + return false; + case ARM::MOVr: + case ARM::MOVr_TC: + case ARM::tMOVr: + case ARM::tMOVgpr2tgpr: + case ARM::tMOVtgpr2gpr: + case ARM::tMOVgpr2gpr: + case ARM::t2MOVr: + return true; + } +} + bool Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, ARMCC::CondCodes CC, ARMCC::CondCodes OCC, SmallSet<unsigned, 4> &Defs, SmallSet<unsigned, 4> &Uses) { - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - assert(SrcSubIdx == 0 && DstSubIdx == 0 && - "Sub-register indices still around?"); - // llvm models select's as two-address instructions. That means a copy - // is inserted before a t2MOVccr, etc. If the copy is scheduled in - // between selects we would end up creating multiple IT blocks. - - // First check if it's safe to move it. - if (Uses.count(DstReg) || Defs.count(SrcReg)) - return false; - - // Then peek at the next instruction to see if it's predicated on CC or OCC. - // If not, then there is nothing to be gained by moving the copy. - MachineBasicBlock::iterator I = MI; ++I; - MachineBasicBlock::iterator E = MI->getParent()->end(); - while (I != E && I->isDebugValue()) - ++I; - if (I != E) { - unsigned NPredReg = 0; - ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); - if (NCC == CC || NCC == OCC) - return true; - } + if (!isCopy(MI)) + return false; + // llvm models select's as two-address instructions. That means a copy + // is inserted before a t2MOVccr, etc. If the copy is scheduled in + // between selects we would end up creating multiple IT blocks. + assert(MI->getOperand(0).getSubReg() == 0 && + MI->getOperand(1).getSubReg() == 0 && + "Sub-register indices still around?"); + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + // First check if it's safe to move it. + if (Uses.count(DstReg) || Defs.count(SrcReg)) + return false; + + // Then peek at the next instruction to see if it's predicated on CC or OCC. + // If not, then there is nothing to be gained by moving the copy. + MachineBasicBlock::iterator I = MI; ++I; + MachineBasicBlock::iterator E = MI->getParent()->end(); + while (I != E && I->isDebugValue()) + ++I; + if (I != E) { + unsigned NPredReg = 0; + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); + if (NCC == CC || NCC == OCC) + return true; } return false; } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index ee517279c9d7..442f41da8a2d 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -147,8 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -173,8 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index ba392f36d946..0c3962dd123d 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -173,7 +173,7 @@ namespace { char Thumb2SizeReduce::ID = 0; } -Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) { +Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { unsigned FromOpc = ReduceTable[i].WideOpc; if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) @@ -315,6 +315,18 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) return false; + // For the non-writeback version (this one), the base register must be + // one of the registers being loaded. + bool isOK = false; + for (unsigned i = 4; i < MI->getNumOperands(); ++i) { + if (MI->getOperand(i).getReg() == BaseReg) { + isOK = true; + break; + } + } + if (!isOK) + return false; + OpNum = 0; isLdStMul = true; break; diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp index 001656e0121a..376811709536 100644 --- a/lib/Target/Alpha/AlphaBranchSelector.cpp +++ b/lib/Target/Alpha/AlphaBranchSelector.cpp @@ -22,7 +22,7 @@ using namespace llvm; namespace { struct AlphaBSel : public MachineFunctionPass { static char ID; - AlphaBSel() : MachineFunctionPass(&ID) {} + AlphaBSel() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &Fn); diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp index a6c6f52704f6..3aec07035d74 100644 --- a/lib/Target/Alpha/AlphaCodeEmitter.cpp +++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp @@ -34,7 +34,7 @@ namespace { public: static char ID; - AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(&ID), + AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(ID), MCE(mce) {} /// getBinaryCodeForInstr - This function, generated by the diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index d526dc0827b2..d197bd15ef9c 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -113,8 +113,8 @@ namespace { static uint64_t getNearPower2(uint64_t x) { if (!x) return 0; unsigned at = CountLeadingZeros_64(x); - uint64_t complow = 1 << (63 - at); - uint64_t comphigh = 1 << (64 - at); + uint64_t complow = 1ULL << (63 - at); + uint64_t comphigh = 1ULL << (64 - at); //cerr << x << ":" << complow << ":" << comphigh << "\n"; if (abs64(complow - x) <= abs64(comphigh - x)) return complow; diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index ad625a269417..5a2f5610fdb4 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -27,32 +27,6 @@ AlphaInstrInfo::AlphaInstrInfo() RI(*this) { } -bool AlphaInstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned& sourceReg, unsigned& destReg, - unsigned& SrcSR, unsigned& DstSR) const { - unsigned oc = MI.getOpcode(); - if (oc == Alpha::BISr || - oc == Alpha::CPYSS || - oc == Alpha::CPYST || - oc == Alpha::CPYSSt || - oc == Alpha::CPYSTs) { - // or r1, r2, r2 - // cpys(s|t) r1 r2 r2 - assert(MI.getNumOperands() >= 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isReg() && - "invalid Alpha BIS instruction!"); - if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - SrcSR = DstSR = 0; - return true; - } - } - return false; -} - unsigned AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index e20e8323b64e..ee6077a4a01a 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -30,12 +30,6 @@ public: /// virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; virtual unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp index 34be470f03e3..85fbfd1affe2 100644 --- a/lib/Target/Alpha/AlphaLLRP.cpp +++ b/lib/Target/Alpha/AlphaLLRP.cpp @@ -39,7 +39,7 @@ namespace { static char ID; AlphaLLRPPass(AlphaTargetMachine &tm) - : MachineFunctionPass(&ID), TM(tm) { } + : MachineFunctionPass(ID), TM(tm) { } virtual const char *getPassName() const { return "Alpha NOP inserter"; diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index dc9d935ec047..327ddb4d9a72 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -137,10 +137,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, //variable locals //<- SP -unsigned +void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -185,7 +184,6 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else { MI.getOperand(i).ChangeToImmediate(Offset); } - return 0; } diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index f9fd87a63737..b164979a6311 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -38,9 +38,8 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp index 9f4aff6fd5f5..5428cb96173b 100644 --- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp +++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp @@ -53,8 +53,6 @@ namespace { void printOp(const MachineOperand &MO, raw_ostream &O); void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printBaseOffsetPair(const MachineInstr *MI, int i, raw_ostream &O, - bool brackets=true); virtual void EmitFunctionBodyStart(); virtual void EmitFunctionBodyEnd(); void EmitStartOfAsmFile(Module &M); diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp index a74d42d59549..e50d57a31b6e 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -28,34 +28,6 @@ BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST) RI(ST, *this), Subtarget(ST) {} -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -bool BlackfinInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, - unsigned &DstReg, - unsigned &SrcSR, - unsigned &DstSR) const { - SrcSR = DstSR = 0; // No sub-registers. - switch (MI.getOpcode()) { - case BF::MOVE: - case BF::MOVE_ncccc: - case BF::MOVE_ccncc: - case BF::MOVECC_zext: - case BF::MOVECC_nz: - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - case BF::SLL16i: - if (MI.getOperand(2).getImm()!=0) - return false; - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - default: - return false; - } -} - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h index 6c3591707269..fdc1029da588 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.h +++ b/lib/Target/Blackfin/BlackfinInstrInfo.h @@ -30,10 +30,6 @@ namespace llvm { /// always be able to get register info as well (through this method). virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; } - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 06e95de1587f..a51831263e90 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -190,10 +190,9 @@ static unsigned findScratchRegister(MachineBasicBlock::iterator II, return Reg; } -unsigned +void BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -230,20 +229,20 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.setDesc(TII.get(isStore ? BF::STORE32p_uimm6m4 : BF::LOAD32p_uimm6m4)); - return 0; + return; } if (BaseReg == BF::FP && isUInt<7>(-Offset)) { MI.setDesc(TII.get(isStore ? BF::STORE32fp_nimm7m4 : BF::LOAD32fp_nimm7m4)); MI.getOperand(FIPos+1).setImm(-Offset); - return 0; + return; } if (isInt<18>(Offset)) { MI.setDesc(TII.get(isStore ? BF::STORE32p_imm18m4 : BF::LOAD32p_imm18m4)); - return 0; + return; } // Use RegScavenger to calculate proper offset... MI.dump(); @@ -328,7 +327,6 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, llvm_unreachable("Cannot eliminate frame index"); break; } - return 0; } void BlackfinRegisterInfo:: @@ -344,10 +342,6 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } } -void BlackfinRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const { -} - // Emit a prologue that sets up a stack frame. // On function entry, R0-R2 and P0 may hold arguments. // R3, P1, and P2 may be used as scratch registers diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index ead0b4a73c83..bb83c34f8003 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -51,15 +51,12 @@ namespace llvm { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index e8d8474b5be8..270fff6064ad 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -73,7 +73,7 @@ namespace { public: static char ID; CBackendNameAllUsedStructsAndMergeFunctions() - : ModulePass(&ID) {} + : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<FindUsedTypes>(); } @@ -110,7 +110,7 @@ namespace { public: static char ID; explicit CWriter(formatted_raw_ostream &o) - : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), + : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0), TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0), NextAnonValueNumber(0) { FPCounter = 0; @@ -199,7 +199,6 @@ namespace { void lowerIntrinsics(Function &F); - void printModule(Module *M); void printModuleTypes(const TypeSymbolTable &ST); void printContainedStructs(const Type *Ty, std::set<const Type *> &); void printFloatingPointConstants(Function &F); @@ -1300,6 +1299,13 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { } std::string CWriter::GetValueName(const Value *Operand) { + + // Resolve potential alias. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) { + if (const Value *V = GA->resolveAliasedGlobal(false)) + Operand = V; + } + // Mangle globals with the standard mangler interface for LLC compatibility. if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) { SmallString<128> Str; diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td index ec2f663908f6..04fa2ae866d6 100644 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -1,4 +1,4 @@ -//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===// +//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -19,16 +19,17 @@ class CCIfSubtarget<string F, CCAction A> // Return Value Calling Convention //===----------------------------------------------------------------------===// -// Return-value convention for Cell SPU: Everything can be passed back via $3: +// Return-value convention for Cell SPU: return value to be passed in reg 3-74 def RetCC_SPU : CallingConv<[ - CCIfType<[i8], CCAssignToReg<[R3]>>, - CCIfType<[i16], CCAssignToReg<[R3]>>, - CCIfType<[i32], CCAssignToReg<[R3]>>, - CCIfType<[i64], CCAssignToReg<[R3]>>, - CCIfType<[i128], CCAssignToReg<[R3]>>, - CCIfType<[f32, f64], CCAssignToReg<[R3]>>, - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>, - CCIfType<[v2i32], CCAssignToReg<[R3]>> + CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64], + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74]>> ]>; @@ -45,8 +46,7 @@ def CCC_SPU : CallingConv<[ R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, + R66, R67, R68, R69, R70, R71, R72, R73, R74]>>, // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 9b8c2ddd0635..2f1598441f5a 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -41,13 +41,6 @@ using namespace llvm; namespace { //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates bool - isI64IntS10Immediate(ConstantSDNode *CN) - { - return isInt<10>(CN->getSExtValue()); - } - - //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates - bool isI32IntS10Immediate(ConstantSDNode *CN) { return isInt<10>(CN->getSExtValue()); @@ -67,14 +60,6 @@ namespace { return isInt<10>(CN->getSExtValue()); } - //! SDNode predicate for i16 sign-extended, 10-bit immediate values - bool - isI16IntS10Immediate(SDNode *N) - { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); - return (CN != 0 && isI16IntS10Immediate(CN)); - } - //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values bool isI16IntU10Immediate(ConstantSDNode *CN) @@ -82,14 +67,6 @@ namespace { return isUInt<10>((short) CN->getZExtValue()); } - //! SDNode predicate for i16 sign-extended, 10-bit immediate values - bool - isI16IntU10Immediate(SDNode *N) - { - return (N->getOpcode() == ISD::Constant - && isI16IntU10Immediate(cast<ConstantSDNode>(N))); - } - //! ConstantSDNode predicate for signed 16-bit values /*! \arg CN The constant SelectionDAG node holding the value @@ -119,14 +96,6 @@ namespace { return false; } - //! SDNode predicate for signed 16-bit values. - bool - isIntS16Immediate(SDNode *N, short &Imm) - { - return (N->getOpcode() == ISD::Constant - && isIntS16Immediate(cast<ConstantSDNode>(N), Imm)); - } - //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext. static bool isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm) @@ -142,16 +111,6 @@ namespace { return false; } - bool - isHighLow(const SDValue &Op) - { - return (Op.getOpcode() == SPUISD::IndirectAddr - && ((Op.getOperand(0).getOpcode() == SPUISD::Hi - && Op.getOperand(1).getOpcode() == SPUISD::Lo) - || (Op.getOperand(0).getOpcode() == SPUISD::Lo - && Op.getOperand(1).getOpcode() == SPUISD::Hi))); - } - //===------------------------------------------------------------------===// //! EVT to "useful stuff" mapping structure: @@ -607,7 +566,8 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, return true; } else if (Opc == ISD::Register ||Opc == ISD::CopyFromReg - ||Opc == ISD::UNDEF) { + ||Opc == ISD::UNDEF + ||Opc == ISD::Constant) { unsigned OpOpc = Op->getOpcode(); if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index ece19b9b89f6..46f31899be0c 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -426,9 +426,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); - // "Odd size" vector classes that we're willing to support: - addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); - for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)i; @@ -751,7 +748,6 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (alignment == 16) { ConstantSDNode *CN; - // Special cases for a known aligned load to simplify the base pointer // and insertion byte: if (basePtr.getOpcode() == ISD::ADD @@ -775,6 +771,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); } } else { // Unaligned load: must be more pessimistic about addressing modes: @@ -811,8 +810,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { DAG.getConstant(0, PtrVT)); } - // Re-emit as a v16i8 vector load - alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, + // Load the memory to which to store. + alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr, SN->getSrcValue(), SN->getSrcValueOffset(), SN->isVolatile(), SN->isNonTemporal(), 16); @@ -843,10 +842,10 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { } #endif - SDValue insertEltOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); - SDValue vectorizeOp = - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); + SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, + insertEltOffs); + SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, + theValue); result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, vectorizeOp, alignLoadVec, @@ -1325,41 +1324,23 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (Ins.empty()) return Chain; + // Now handle the return value(s) + SmallVector<CCValAssign, 16> RVLocs; + CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU); + + // If the call has results, copy the values out of the ret val registers. - switch (Ins[0].VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected ret value!"); - case MVT::Other: break; - case MVT::i32: - if (Ins.size() > 1 && Ins[1].VT == MVT::i32) { - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, - MVT::i32, InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, - Chain.getValue(2)).getValue(1); - InVals.push_back(Chain.getValue(0)); - } else { - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - } - break; - case MVT::i8: - case MVT::i16: - case MVT::i64: - case MVT::i128: - case MVT::f32: - case MVT::f64: - case MVT::v2f64: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - break; - } + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + InVals.push_back(Val); + } return Chain; } @@ -1621,10 +1602,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); } - case MVT::v2i32: { - SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); - } case MVT::v2i64: { return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); } @@ -1748,11 +1725,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If we have a single element being moved from V1 to V2, this can be handled // using the C*[DX] compute mask instructions, but the vector elements have - // to be monotonically increasing with one exception element. + // to be monotonically increasing with one exception element, and the source + // slot of the element to move must be the same as the destination. EVT VecVT = V1.getValueType(); EVT EltVT = VecVT.getVectorElementType(); unsigned EltsFromV2 = 0; - unsigned V2Elt = 0; + unsigned V2EltOffset = 0; unsigned V2EltIdx0 = 0; unsigned CurrElt = 0; unsigned MaxElts = VecVT.getVectorNumElements(); @@ -1785,9 +1763,13 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (monotonic) { if (SrcElt >= V2EltIdx0) { - if (1 >= (++EltsFromV2)) { - V2Elt = (V2EltIdx0 - SrcElt) << 2; - } + // TODO: optimize for the monotonic case when several consecutive + // elements are taken form V2. Do we ever get such a case? + if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0)) + V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8); + else + monotonic = false; + ++EltsFromV2; } else if (CurrElt != SrcElt) { monotonic = false; } @@ -1823,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // R1 ($sp) is used here only as it is guaranteed to have last bits zero SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(V2Elt, MVT::i32)); + DAG.getConstant(V2EltOffset, MVT::i32)); SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); @@ -1847,7 +1829,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { for (unsigned j = 0; j < BytesPerElement; ++j) ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); } - SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &ResultMask[0], ResultMask.size()); return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); @@ -1997,7 +1978,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // Variable index: Rotate the requested element into slot 0, then replicate // slot 0 across the vector EVT VecVT = N.getValueType(); - if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { + if (!VecVT.isSimple() || !VecVT.isVector()) { report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" "vector type!"); } @@ -2072,21 +2053,25 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue IdxOp = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); + EVT eltVT = ValOp.getValueType(); // use 0 when the lane to insert to is 'undef' - int64_t Idx=0; + int64_t Offset=0; if (IdxOp.getOpcode() != ISD::UNDEF) { ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); - Idx = (CN->getSExtValue()); + Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8; } EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Use $sp ($1) because it's always 16-byte aligned and it's available: SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(Idx, PtrVT)); - SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); + DAG.getConstant(Offset, PtrVT)); + // widen the mask when dealing with half vectors + EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), + 128/ VT.getVectorElementType().getSizeInBits()); + SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); SDValue result = DAG.getNode(SPUISD::SHUFB, dl, VT, diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 69aa0887bd77..26d6b4f25ef1 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -54,148 +54,6 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) RI(*TM.getSubtargetImpl(), *this) { /* NOP */ } -bool -SPUInstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned& sourceReg, - unsigned& destReg, - unsigned& SrcSR, unsigned& DstSR) const { - SrcSR = DstSR = 0; // No sub-registers. - - switch (MI.getOpcode()) { - default: - break; - case SPU::ORIv4i32: - case SPU::ORIr32: - case SPU::ORHIv8i16: - case SPU::ORHIr16: - case SPU::ORHIi8i16: - case SPU::ORBIv16i8: - case SPU::ORBIr8: - case SPU::ORIi16i32: - case SPU::ORIi8i32: - case SPU::AHIvec: - case SPU::AHIr16: - case SPU::AIv4i32: - assert(MI.getNumOperands() == 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isImm() && - "invalid SPU ORI/ORHI/ORBI/AHI/AI/SFI/SFHI instruction!"); - if (MI.getOperand(2).getImm() == 0) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - break; - case SPU::AIr32: - assert(MI.getNumOperands() == 3 && - "wrong number of operands to AIr32"); - if (MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - (MI.getOperand(2).isImm() && - MI.getOperand(2).getImm() == 0)) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - break; - case SPU::LRr8: - case SPU::LRr16: - case SPU::LRr32: - case SPU::LRf32: - case SPU::LRr64: - case SPU::LRf64: - case SPU::LRr128: - case SPU::LRv16i8: - case SPU::LRv8i16: - case SPU::LRv4i32: - case SPU::LRv4f32: - case SPU::LRv2i64: - case SPU::LRv2f64: - case SPU::ORv16i8_i8: - case SPU::ORv8i16_i16: - case SPU::ORv4i32_i32: - case SPU::ORv2i64_i64: - case SPU::ORv4f32_f32: - case SPU::ORv2f64_f64: - case SPU::ORi8_v16i8: - case SPU::ORi16_v8i16: - case SPU::ORi32_v4i32: - case SPU::ORi64_v2i64: - case SPU::ORf32_v4f32: - case SPU::ORf64_v2f64: -/* - case SPU::ORi128_r64: - case SPU::ORi128_f64: - case SPU::ORi128_r32: - case SPU::ORi128_f32: - case SPU::ORi128_r16: - case SPU::ORi128_r8: -*/ - case SPU::ORi128_vec: -/* - case SPU::ORr64_i128: - case SPU::ORf64_i128: - case SPU::ORr32_i128: - case SPU::ORf32_i128: - case SPU::ORr16_i128: - case SPU::ORr8_i128: -*/ - case SPU::ORvec_i128: -/* - case SPU::ORr16_r32: - case SPU::ORr8_r32: - case SPU::ORf32_r32: - case SPU::ORr32_f32: - case SPU::ORr32_r16: - case SPU::ORr32_r8: - case SPU::ORr16_r64: - case SPU::ORr8_r64: - case SPU::ORr64_r16: - case SPU::ORr64_r8: -*/ - case SPU::ORr64_r32: - case SPU::ORr32_r64: - case SPU::ORf32_r32: - case SPU::ORr32_f32: - case SPU::ORf64_r64: - case SPU::ORr64_f64: { - assert(MI.getNumOperands() == 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid SPU OR<type>_<vec> or LR instruction!"); - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - break; - } - case SPU::ORv16i8: - case SPU::ORv8i16: - case SPU::ORv4i32: - case SPU::ORv2i64: - case SPU::ORr8: - case SPU::ORr16: - case SPU::ORr32: - case SPU::ORr64: - case SPU::ORr128: - case SPU::ORf32: - case SPU::ORf64: - assert(MI.getNumOperands() == 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isReg() && - "invalid SPU OR(vec|r32|r64|gprc) instruction!"); - if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - break; - } - - return false; -} - unsigned SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index fbb173318148..191e55d0ca61 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -32,12 +32,6 @@ namespace llvm { /// virtual const SPURegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index a7fb14c26a76..ca0fe00e37f8 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -62,8 +62,6 @@ let canFoldAsLoad = 1 in { def v4f32: LoadDFormVec<v4f32>; def v2f64: LoadDFormVec<v2f64>; - def v2i32: LoadDFormVec<v2i32>; - def r128: LoadDForm<GPRC>; def r64: LoadDForm<R64C>; def r32: LoadDForm<R32C>; @@ -96,8 +94,6 @@ let canFoldAsLoad = 1 in { def v4f32: LoadAFormVec<v4f32>; def v2f64: LoadAFormVec<v2f64>; - def v2i32: LoadAFormVec<v2i32>; - def r128: LoadAForm<GPRC>; def r64: LoadAForm<R64C>; def r32: LoadAForm<R32C>; @@ -130,8 +126,6 @@ let canFoldAsLoad = 1 in { def v4f32: LoadXFormVec<v4f32>; def v2f64: LoadXFormVec<v2f64>; - def v2i32: LoadXFormVec<v2i32>; - def r128: LoadXForm<GPRC>; def r64: LoadXForm<R64C>; def r32: LoadXForm<R32C>; @@ -180,8 +174,6 @@ multiclass StoreDForms def v4f32: StoreDFormVec<v4f32>; def v2f64: StoreDFormVec<v2f64>; - def v2i32: StoreDFormVec<v2i32>; - def r128: StoreDForm<GPRC>; def r64: StoreDForm<R64C>; def r32: StoreDForm<R32C>; @@ -212,8 +204,6 @@ multiclass StoreAForms def v4f32: StoreAFormVec<v4f32>; def v2f64: StoreAFormVec<v2f64>; - def v2i32: StoreAFormVec<v2i32>; - def r128: StoreAForm<GPRC>; def r64: StoreAForm<R64C>; def r32: StoreAForm<R32C>; @@ -246,8 +236,6 @@ multiclass StoreXForms def v4f32: StoreXFormVec<v4f32>; def v2f64: StoreXFormVec<v2f64>; - def v2i32: StoreXFormVec<v2i32>; - def r128: StoreXForm<GPRC>; def r64: StoreXForm<R64C>; def r32: StoreXForm<R32C>; @@ -607,7 +595,6 @@ class ARegInst<RegisterClass rclass>: multiclass AddInstruction { def v4i32: AVecInst<v4i32>; def v16i8: AVecInst<v16i8>; - def r32: ARegInst<R32C>; } @@ -672,6 +659,7 @@ def SFvec : RRForm<0b00000010000, (outs VECREG:$rT), "sf\t$rT, $rA, $rB", IntegerOp, [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>; + def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), "sf\t$rT, $rA, $rB", IntegerOp, [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>; @@ -1448,6 +1436,9 @@ class ORCvtGPRCVec: class ORCvtVecGPRC: ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; +class ORCvtVecVec: + ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>; + multiclass BitwiseOr { def v16i8: ORVecInst<v16i8>; @@ -3894,6 +3885,79 @@ multiclass SFPSub defm FS : SFPSub; +class FMInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b01100011010, OOL, IOL, + "fm\t$rT, $rA, $rB", SPrecFP, + pattern>; + +class FMVecInst<ValueType type>: + FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (type VECREG:$rT), + (fmul (type VECREG:$rA), (type VECREG:$rB)))]>; + +multiclass SFPMul +{ + def v4f32: FMVecInst<v4f32>; + def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; +} + +defm FM : SFPMul; + +// Floating point multiply and add +// e.g. d = c + (a * b) +def FMAv4f32: + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fma\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fadd (v4f32 VECREG:$rC), + (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>; + +def FMAf32: + RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fma\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; + +// FP multiply and subtract +// Subtracts value in rC from product +// res = a * b - c +def FMSv4f32 : + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), + (v4f32 VECREG:$rC)))]>; + +def FMSf32 : + RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, + (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>; + +// Floating Negative Mulitply and Subtract +// Subtracts product from value in rC +// res = fneg(fms a b c) +// = - (a * b - c) +// = c - a * b +// NOTE: subtraction order +// fsub a b = a - b +// fs a b = b - a? +def FNMSf32 : + RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fnms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; + +def FNMSv4f32 : + RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fnms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fsub (v4f32 VECREG:$rC), + (fmul (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB))))]>; + + + + // Floating point reciprocal estimate class FRESTInst<dag OOL, dag IOL>: @@ -4019,72 +4083,6 @@ def FSCRRf32 : // status and control register read //-------------------------------------- -// Floating point multiply instructions -//-------------------------------------- - -def FMv4f32: - RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fm\t$rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (fmul (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def FMf32 : - RRForm<0b01100011010, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - "fm\t$rT, $rA, $rB", SPrecFP, - [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; - -// Floating point multiply and add -// e.g. d = c + (a * b) -def FMAv4f32: - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fma\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fadd (v4f32 VECREG:$rC), - (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>; - -def FMAf32: - RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fma\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; - -// FP multiply and subtract -// Subtracts value in rC from product -// res = a * b - c -def FMSv4f32 : - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), - (v4f32 VECREG:$rC)))]>; - -def FMSf32 : - RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, - (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>; - -// Floating Negative Mulitply and Subtract -// Subtracts product from value in rC -// res = fneg(fms a b c) -// = - (a * b - c) -// = c - a * b -// NOTE: subtraction order -// fsub a b = a - b -// fs a b = b - a? -def FNMSf32 : - RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fnms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; - -def FNMSv4f32 : - RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fnms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fsub (v4f32 VECREG:$rC), - (fmul (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB))))]>; - -//-------------------------------------- // Floating Point Conversions // Signed conversions: def CSiFv4f32: diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td index 6216651e48a4..e1a0358abc46 100644 --- a/lib/Target/CellSPU/SPUOperands.td +++ b/lib/Target/CellSPU/SPUOperands.td @@ -98,12 +98,6 @@ def immU8 : PatLeaf<(imm), [{ return (N->getZExtValue() <= 0xff); }]>; -// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign -// extended field. Used by RI10Form instructions like 'ldq'. -def i64ImmSExt10 : PatLeaf<(imm), [{ - return isI64IntS10Immediate(N); -}]>; - // i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign // extended field. Used by RI10Form instructions like 'ldq'. def i32ImmSExt10 : PatLeaf<(imm), [{ diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index f7cfa42f2a95..cf718917a561 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -270,9 +270,8 @@ SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, MBB.erase(I); } -unsigned +void SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value, RegScavenger *RS) const { unsigned i = 0; @@ -328,7 +327,6 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } else { MO.ChangeToImmediate(Offset); } - return 0; } /// determineFrameLayout - Determine the size of the frame and maximum call @@ -417,7 +415,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const if (hasDebugInfo) { // Mark effective beginning of when frame pointer becomes valid. FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel); } // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) @@ -476,7 +474,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const // Mark effective beginning of when frame pointer is ready. MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(ReadyLabel); + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel); MachineLocation FPDst(SPU::R1); MachineLocation FPSrc(MachineLocation::VirtualFP); @@ -491,7 +489,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const dl = MBBI->getDebugLoc(); // Insert terminator label - BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)) + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)) .addSym(MMI.getContext().CreateTempSymbol()); } } @@ -587,6 +585,7 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const case SPU::LQDr32: return SPU::LQXr32; case SPU::LQDr128: return SPU::LQXr128; case SPU::LQDv16i8: return SPU::LQXv16i8; + case SPU::LQDv4i32: return SPU::LQXv4i32; case SPU::LQDv4f32: return SPU::LQXv4f32; case SPU::STQDr32: return SPU::STQXr32; case SPU::STQDr128: return SPU::STQXr128; diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 7a6ae6d43c7e..aedb769cb4fc 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -63,9 +63,8 @@ namespace llvm { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; //! Convert frame indicies into machine operands - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + RegScavenger *RS = NULL) const; //! Determine the frame's layour void determineFrameLayout(MachineFunction &MF) const; diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td index bb88f2bf9a29..3e8f0979256a 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.td +++ b/lib/Target/CellSPU/SPURegisterInfo.td @@ -394,7 +394,7 @@ def R8C : RegisterClass<"SPU", [i8], 128, // The SPU's registers as vector registers: def VECREG : RegisterClass<"SPU", - [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64], + [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128, [ /* volatile register */ diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 145568adcd4a..f08559f6e9f2 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -104,7 +104,7 @@ namespace { public: static char ID; explicit CppWriter(formatted_raw_ostream &o) : - ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){} + ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){} virtual const char *getPassName() const { return "C++ backend"; } @@ -288,6 +288,8 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { Out << "GlobalValue::LinkerPrivateLinkage"; break; case GlobalValue::LinkerPrivateWeakLinkage: Out << "GlobalValue::LinkerPrivateWeakLinkage"; break; + case GlobalValue::LinkerPrivateWeakDefAutoLinkage: + Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break; case GlobalValue::AvailableExternallyLinkage: Out << "GlobalValue::AvailableExternallyLinkage "; break; case GlobalValue::LinkOnceAnyLinkage: @@ -471,14 +473,22 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, HANDLE_ATTR(Nest); HANDLE_ATTR(ReadNone); HANDLE_ATTR(ReadOnly); - HANDLE_ATTR(InlineHint); HANDLE_ATTR(NoInline); HANDLE_ATTR(AlwaysInline); HANDLE_ATTR(OptimizeForSize); HANDLE_ATTR(StackProtect); HANDLE_ATTR(StackProtectReq); HANDLE_ATTR(NoCapture); + HANDLE_ATTR(NoRedZone); + HANDLE_ATTR(NoImplicitFloat); + HANDLE_ATTR(Naked); + HANDLE_ATTR(InlineHint); #undef HANDLE_ATTR + if (attrs & Attribute::StackAlignment) + Out << " | Attribute::constructStackAlignmentFromInt(" + << Attribute::getStackAlignmentFromAttrs(attrs) + << ")"; + attrs &= ~Attribute::StackAlignment; assert(attrs == 0 && "Unhandled attribute!"); Out << ";"; nl(Out); @@ -1404,7 +1414,8 @@ void CppWriter::printInstruction(const Instruction *I, nl(Out); } Out << "CallInst* " << iName << " = CallInst::Create(" - << opNames[call->getNumArgOperands()] << ", " << iName << "_params.begin(), " + << opNames[call->getNumArgOperands()] << ", " + << iName << "_params.begin(), " << iName << "_params.end(), \""; } else if (call->getNumArgOperands() == 1) { Out << "CallInst* " << iName << " = CallInst::Create(" diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp index b6e4d654f4aa..f4b30ad271f1 100644 --- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp @@ -65,11 +65,8 @@ namespace { void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = 0); - void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier = 0); void printSavedRegsBitmask(raw_ostream &OS); - const char *emitCurrentABIString(); void emitFrameDirective(); void printInstruction(const MachineInstr *MI, raw_ostream &O); @@ -292,13 +289,6 @@ printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, printOperand(MI, opNum, O); } -void MBlazeAsmPrinter:: -printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier) { - const MachineOperand& MO = MI->getOperand(opNum); - O << MBlaze::MBlazeFCCToString((MBlaze::CondCode)MO.getImm()); -} - // Force static initialization. extern "C" void LLVMInitializeMBlazeAsmPrinter() { RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget); diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td index 482ddd3963fb..3815b6d0a398 100644 --- a/lib/Target/MBlaze/MBlaze.td +++ b/lib/Target/MBlaze/MBlaze.td @@ -1,4 +1,4 @@ -//===- MBlaze.td - Describe the MBlaze Target Machine -----------*- C++ -*-===// +//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td index ddd49980e0a2..8622e0d74bcd 100644 --- a/lib/Target/MBlaze/MBlazeCallingConv.td +++ b/lib/Target/MBlaze/MBlazeCallingConv.td @@ -1,4 +1,4 @@ -//===- MBlazeCallingConv.td - Calling Conventions for MBlaze ----*- C++ -*-===// +//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp index 42fea2507332..b551b79b291e 100644 --- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp +++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp @@ -32,7 +32,7 @@ namespace { static char ID; Filler(TargetMachine &tm) - : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { } + : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } virtual const char *getPassName() const { return "MBlaze Delay Slot Filler"; diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp index c7cd5f4e44a9..e64dd0e3e2c3 100644 --- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp +++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp @@ -219,7 +219,7 @@ SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) { // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - if (Predicate_immSExt16(CN)) { + if (isUInt<16>(CN->getZExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td index a48a8c972353..657b1d4940a7 100644 --- a/lib/Target/MBlaze/MBlazeInstrFPU.td +++ b/lib/Target/MBlaze/MBlazeInstrFPU.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----------*- C++ -*-===// +//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td index b59999e76ae5..51584111e666 100644 --- a/lib/Target/MBlaze/MBlazeInstrFSL.td +++ b/lib/Target/MBlaze/MBlazeInstrFSL.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----------*- C++ -*-===// +//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td index 7d655433d4e4..28e8e4402225 100644 --- a/lib/Target/MBlaze/MBlazeInstrFormats.td +++ b/lib/Target/MBlaze/MBlazeInstrFormats.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrFormats.td - MB Instruction defs --------------*- C++ -*-===// +//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 6ff5825a26b6..b590c090e095 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -30,41 +30,6 @@ static bool isZeroImm(const MachineOperand &op) { return op.isImm() && op.getImm() == 0; } -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -bool MBlazeInstrInfo:: -isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - - // add $dst, $src, $zero || addu $dst, $zero, $src - // or $dst, $src, $zero || or $dst, $zero, $src - if ((MI.getOpcode() == MBlaze::ADD) || (MI.getOpcode() == MBlaze::OR)) { - if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == MBlaze::R0) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - return true; - } else if (MI.getOperand(2).isReg() && - MI.getOperand(2).getReg() == MBlaze::R0) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - } - - // addi $dst, $src, 0 - // ori $dst, $src, 0 - if ((MI.getOpcode() == MBlaze::ADDI) || (MI.getOpcode() == MBlaze::ORI)) { - if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - } - - return false; -} - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index f0743705f010..b3dba0ec768c 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -173,12 +173,6 @@ public: /// virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td index 3c406dda0591..e5d153474a7e 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrInfo.td - MBlaze Instruction defs -------------*- C++ -*-===// +//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td index 82552fa4b343..a27cb5ba0dc4 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsics.td +++ b/lib/Target/MBlaze/MBlazeIntrinsics.td @@ -17,17 +17,11 @@ // MBlaze intrinsic classes. let TargetPrefix = "mblaze", isTarget = 1 in { - class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty], - [IntrWriteMem]>; + class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; - class MBFSL_Put_Intrinsic : Intrinsic<[], - [llvm_i32_ty, llvm_i32_ty], - [IntrWriteMem]>; + class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; - class MBFSL_PutT_Intrinsic : Intrinsic<[], - [llvm_i32_ty], - [IntrWriteMem]>; + class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], []>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 8cafa8c519c6..22b6a30470d1 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -242,9 +242,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // FrameIndex represent objects inside a abstract stack. // We must replace FrameIndex with an stack/frame pointer // direct reference. -unsigned MBlazeRegisterInfo:: +void MBlazeRegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value, RegScavenger *RS) const { + RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); @@ -277,7 +277,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, MI.getOperand(oi).ChangeToImmediate(Offset); MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); - return 0; } void MBlazeRegisterInfo:: diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index af97b0e2d79e..1e1fde14ab7b 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -63,9 +63,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { MachineBasicBlock::iterator I) const; /// Stack Frame Processing Methods - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td index d0a1e7556c43..5e935103389e 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.td +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td @@ -1,4 +1,4 @@ -//===- MBlazeRegisterInfo.td - MBlaze Register defs -------------*- C++ -*-===// +//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td index 1fec9e694005..4a65542a447c 100644 --- a/lib/Target/MBlaze/MBlazeSchedule.td +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -1,4 +1,4 @@ -//===- MBlazeSchedule.td - MBlaze Scheduling Definitions --------*- C++ -*-===// +//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSIL/CMakeLists.txt b/lib/Target/MSIL/CMakeLists.txt deleted file mode 100644 index b1d47ef05ec5..000000000000 --- a/lib/Target/MSIL/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_target(MSIL - MSILWriter.cpp - ) diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp deleted file mode 100644 index cc350e8a4f89..000000000000 --- a/lib/Target/MSIL/MSILWriter.cpp +++ /dev/null @@ -1,1706 +0,0 @@ -//===-- MSILWriter.cpp - Library for converting LLVM code to MSIL ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This library converts LLVM code to MSIL code. -// -//===----------------------------------------------------------------------===// - -#include "MSILWriter.h" -#include "llvm/CallingConv.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/TypeSymbolTable.h" -#include "llvm/Analysis/ConstantsScanner.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/Passes.h" -using namespace llvm; - -namespace llvm { - // TargetMachine for the MSIL - struct MSILTarget : public TargetMachine { - MSILTarget(const Target &T, const std::string &TT, const std::string &FS) - : TargetMachine(T) {} - - virtual bool addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify); - - virtual const TargetData *getTargetData() const { return 0; } - }; -} - -extern "C" void LLVMInitializeMSILTarget() { - // Register the target. - RegisterTargetMachine<MSILTarget> X(TheMSILTarget); -} - -bool MSILModule::runOnModule(Module &M) { - ModulePtr = &M; - TD = &getAnalysis<TargetData>(); - bool Changed = false; - // Find named types. - TypeSymbolTable& Table = M.getTypeSymbolTable(); - std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes(); - for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) { - if (!I->second->isStructTy() && !I->second->isOpaqueTy()) - Table.remove(I++); - else { - std::set<const Type *>::iterator T = Types.find(I->second); - if (T==Types.end()) - Table.remove(I++); - else { - Types.erase(T); - ++I; - } - } - } - // Find unnamed types. - unsigned RenameCounter = 0; - for (std::set<const Type *>::const_iterator I = Types.begin(), - E = Types.end(); I!=E; ++I) - if (const StructType *STy = dyn_cast<StructType>(*I)) { - while (ModulePtr->addTypeName("unnamed$"+utostr(RenameCounter), STy)) - ++RenameCounter; - Changed = true; - } - // Pointer for FunctionPass. - UsedTypes = &getAnalysis<FindUsedTypes>().getTypes(); - return Changed; -} - -char MSILModule::ID = 0; -char MSILWriter::ID = 0; - -bool MSILWriter::runOnFunction(Function &F) { - if (F.isDeclaration()) return false; - - // Do not codegen any 'available_externally' functions at all, they have - // definitions outside the translation unit. - if (F.hasAvailableExternallyLinkage()) - return false; - - LInfo = &getAnalysis<LoopInfo>(); - printFunction(F); - return false; -} - - -bool MSILWriter::doInitialization(Module &M) { - ModulePtr = &M; - Out << ".assembly extern mscorlib {}\n"; - Out << ".assembly MSIL {}\n\n"; - Out << "// External\n"; - printExternals(); - Out << "// Declarations\n"; - printDeclarations(M.getTypeSymbolTable()); - Out << "// Definitions\n"; - printGlobalVariables(); - Out << "// Startup code\n"; - printModuleStartup(); - return false; -} - - -bool MSILWriter::doFinalization(Module &M) { - return false; -} - - -void MSILWriter::printModuleStartup() { - Out << - ".method static public int32 $MSIL_Startup() {\n" - "\t.entrypoint\n" - "\t.locals (native int i)\n" - "\t.locals (native int argc)\n" - "\t.locals (native int ptr)\n" - "\t.locals (void* argv)\n" - "\t.locals (string[] args)\n" - "\tcall\tstring[] [mscorlib]System.Environment::GetCommandLineArgs()\n" - "\tdup\n" - "\tstloc\targs\n" - "\tldlen\n" - "\tconv.i4\n" - "\tdup\n" - "\tstloc\targc\n"; - printPtrLoad(TD->getPointerSize()); - Out << - "\tmul\n" - "\tlocalloc\n" - "\tstloc\targv\n" - "\tldc.i4.0\n" - "\tstloc\ti\n" - "L_01:\n" - "\tldloc\ti\n" - "\tldloc\targc\n" - "\tceq\n" - "\tbrtrue\tL_02\n" - "\tldloc\targs\n" - "\tldloc\ti\n" - "\tldelem.ref\n" - "\tcall\tnative int [mscorlib]System.Runtime.InteropServices.Marshal::" - "StringToHGlobalAnsi(string)\n" - "\tstloc\tptr\n" - "\tldloc\targv\n" - "\tldloc\ti\n"; - printPtrLoad(TD->getPointerSize()); - Out << - "\tmul\n" - "\tadd\n" - "\tldloc\tptr\n" - "\tstind.i\n" - "\tldloc\ti\n" - "\tldc.i4.1\n" - "\tadd\n" - "\tstloc\ti\n" - "\tbr\tL_01\n" - "L_02:\n" - "\tcall void $MSIL_Init()\n"; - - // Call user 'main' function. - const Function* F = ModulePtr->getFunction("main"); - if (!F || F->isDeclaration()) { - Out << "\tldc.i4.0\n\tret\n}\n"; - return; - } - bool BadSig = true; - std::string Args(""); - Function::const_arg_iterator Arg1,Arg2; - - switch (F->arg_size()) { - case 0: - BadSig = false; - break; - case 1: - Arg1 = F->arg_begin(); - if (Arg1->getType()->isIntegerTy()) { - Out << "\tldloc\targc\n"; - Args = getTypeName(Arg1->getType()); - BadSig = false; - } - break; - case 2: - Arg1 = Arg2 = F->arg_begin(); ++Arg2; - if (Arg1->getType()->isIntegerTy() && - Arg2->getType()->getTypeID() == Type::PointerTyID) { - Out << "\tldloc\targc\n\tldloc\targv\n"; - Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType()); - BadSig = false; - } - break; - default: - BadSig = true; - } - - bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID); - if (BadSig || (!F->getReturnType()->isIntegerTy() && !RetVoid)) { - Out << "\tldc.i4.0\n"; - } else { - Out << "\tcall\t" << getTypeName(F->getReturnType()) << - getConvModopt(F->getCallingConv()) << "main(" << Args << ")\n"; - if (RetVoid) - Out << "\tldc.i4.0\n"; - else - Out << "\tconv.i4\n"; - } - Out << "\tret\n}\n"; -} - -bool MSILWriter::isZeroValue(const Value* V) { - if (const Constant *C = dyn_cast<Constant>(V)) - return C->isNullValue(); - return false; -} - - -std::string MSILWriter::getValueName(const Value* V) { - std::string Name; - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) - Name = GV->getName(); - else { - unsigned &No = AnonValueNumbers[V]; - if (No == 0) No = ++NextAnonValueNumber; - Name = "tmp" + utostr(No); - } - - // Name into the quotes allow control and space characters. - return "'"+Name+"'"; -} - - -std::string MSILWriter::getLabelName(const std::string& Name) { - if (Name.find('.')!=std::string::npos) { - std::string Tmp(Name); - // Replace unaccepable characters in the label name. - for (std::string::iterator I = Tmp.begin(), E = Tmp.end(); I!=E; ++I) - if (*I=='.') *I = '@'; - return Tmp; - } - return Name; -} - - -std::string MSILWriter::getLabelName(const Value* V) { - std::string Name; - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) - Name = GV->getName(); - else { - unsigned &No = AnonValueNumbers[V]; - if (No == 0) No = ++NextAnonValueNumber; - Name = "tmp" + utostr(No); - } - - return getLabelName(Name); -} - - -std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) { - switch (CallingConvID) { - case CallingConv::C: - case CallingConv::Cold: - case CallingConv::Fast: - return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvCdecl) "; - case CallingConv::X86_FastCall: - return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) "; - case CallingConv::X86_StdCall: - return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) "; - case CallingConv::X86_ThisCall: - return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvThiscall) "; - default: - errs() << "CallingConvID = " << CallingConvID << '\n'; - llvm_unreachable("Unsupported calling convention"); - } - return ""; // Not reached -} - - -std::string MSILWriter::getArrayTypeName(Type::TypeID TyID, const Type* Ty) { - std::string Tmp = ""; - const Type* ElemTy = Ty; - assert(Ty->getTypeID()==TyID && "Invalid type passed"); - // Walk trought array element types. - for (;;) { - // Multidimensional array. - if (ElemTy->getTypeID()==TyID) { - if (const ArrayType* ATy = dyn_cast<ArrayType>(ElemTy)) - Tmp += utostr(ATy->getNumElements()); - else if (const VectorType* VTy = dyn_cast<VectorType>(ElemTy)) - Tmp += utostr(VTy->getNumElements()); - ElemTy = cast<SequentialType>(ElemTy)->getElementType(); - } - // Base element type found. - if (ElemTy->getTypeID()!=TyID) break; - Tmp += ","; - } - return getTypeName(ElemTy, false, true)+"["+Tmp+"]"; -} - - -std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) { - unsigned NumBits = 0; - switch (Ty->getTypeID()) { - case Type::VoidTyID: - return "void "; - case Type::IntegerTyID: - NumBits = getBitWidth(Ty); - if(NumBits==1) - return "bool "; - if (!isSigned) - return "unsigned int"+utostr(NumBits)+" "; - return "int"+utostr(NumBits)+" "; - case Type::FloatTyID: - return "float32 "; - case Type::DoubleTyID: - return "float64 "; - default: - errs() << "Type = " << *Ty << '\n'; - llvm_unreachable("Invalid primitive type"); - } - return ""; // Not reached -} - - -std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned, - bool isNested) { - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) - return getPrimitiveTypeName(Ty,isSigned); - // FIXME: "OpaqueType" support - switch (Ty->getTypeID()) { - case Type::PointerTyID: - return "void* "; - case Type::StructTyID: - if (isNested) - return ModulePtr->getTypeName(Ty); - return "valuetype '"+ModulePtr->getTypeName(Ty)+"' "; - case Type::ArrayTyID: - if (isNested) - return getArrayTypeName(Ty->getTypeID(),Ty); - return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' "; - case Type::VectorTyID: - if (isNested) - return getArrayTypeName(Ty->getTypeID(),Ty); - return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' "; - default: - errs() << "Type = " << *Ty << '\n'; - llvm_unreachable("Invalid type in getTypeName()"); - } - return ""; // Not reached -} - - -MSILWriter::ValueType MSILWriter::getValueLocation(const Value* V) { - // Function argument - if (isa<Argument>(V)) - return ArgumentVT; - // Function - else if (const Function* F = dyn_cast<Function>(V)) - return F->hasLocalLinkage() ? InternalVT : GlobalVT; - // Variable - else if (const GlobalVariable* G = dyn_cast<GlobalVariable>(V)) - return G->hasLocalLinkage() ? InternalVT : GlobalVT; - // Constant - else if (isa<Constant>(V)) - return isa<ConstantExpr>(V) ? ConstExprVT : ConstVT; - // Local variable - return LocalVT; -} - - -std::string MSILWriter::getTypePostfix(const Type* Ty, bool Expand, - bool isSigned) { - unsigned NumBits = 0; - switch (Ty->getTypeID()) { - // Integer constant, expanding for stack operations. - case Type::IntegerTyID: - NumBits = getBitWidth(Ty); - // Expand integer value to "int32" or "int64". - if (Expand) return (NumBits<=32 ? "i4" : "i8"); - if (NumBits==1) return "i1"; - return (isSigned ? "i" : "u")+utostr(NumBits/8); - // Float constant. - case Type::FloatTyID: - return "r4"; - case Type::DoubleTyID: - return "r8"; - case Type::PointerTyID: - return "i"+utostr(TD->getTypeAllocSize(Ty)); - default: - errs() << "TypeID = " << Ty->getTypeID() << '\n'; - llvm_unreachable("Invalid type in TypeToPostfix()"); - } - return ""; // Not reached -} - - -void MSILWriter::printConvToPtr() { - switch (ModulePtr->getPointerSize()) { - case Module::Pointer32: - printSimpleInstruction("conv.u4"); - break; - case Module::Pointer64: - printSimpleInstruction("conv.u8"); - break; - default: - llvm_unreachable("Module use not supporting pointer size"); - } -} - - -void MSILWriter::printPtrLoad(uint64_t N) { - switch (ModulePtr->getPointerSize()) { - case Module::Pointer32: - printSimpleInstruction("ldc.i4",utostr(N).c_str()); - // FIXME: Need overflow test? - if (!isUInt<32>(N)) { - errs() << "Value = " << utostr(N) << '\n'; - llvm_unreachable("32-bit pointer overflowed"); - } - break; - case Module::Pointer64: - printSimpleInstruction("ldc.i8",utostr(N).c_str()); - break; - default: - llvm_unreachable("Module use not supporting pointer size"); - } -} - - -void MSILWriter::printValuePtrLoad(const Value* V) { - printValueLoad(V); - printConvToPtr(); -} - - -void MSILWriter::printConstLoad(const Constant* C) { - if (const ConstantInt* CInt = dyn_cast<ConstantInt>(C)) { - // Integer constant - Out << "\tldc." << getTypePostfix(C->getType(),true) << '\t'; - if (CInt->isMinValue(true)) - Out << CInt->getSExtValue(); - else - Out << CInt->getZExtValue(); - } else if (const ConstantFP* FP = dyn_cast<ConstantFP>(C)) { - // Float constant - uint64_t X; - unsigned Size; - if (FP->getType()->getTypeID()==Type::FloatTyID) { - X = (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue(); - Size = 4; - } else { - X = FP->getValueAPF().bitcastToAPInt().getZExtValue(); - Size = 8; - } - Out << "\tldc.r" << Size << "\t( " << utohexstr(X) << ')'; - } else if (isa<UndefValue>(C)) { - // Undefined constant value = NULL. - printPtrLoad(0); - } else { - errs() << "Constant = " << *C << '\n'; - llvm_unreachable("Invalid constant value"); - } - Out << '\n'; -} - - -void MSILWriter::printValueLoad(const Value* V) { - MSILWriter::ValueType Location = getValueLocation(V); - switch (Location) { - // Global variable or function address. - case GlobalVT: - case InternalVT: - if (const Function* F = dyn_cast<Function>(V)) { - std::string Name = getConvModopt(F->getCallingConv())+getValueName(F); - printSimpleInstruction("ldftn", - getCallSignature(F->getFunctionType(),NULL,Name).c_str()); - } else { - std::string Tmp; - const Type* ElemTy = cast<PointerType>(V->getType())->getElementType(); - if (Location==GlobalVT && cast<GlobalVariable>(V)->hasDLLImportLinkage()) { - Tmp = "void* "+getValueName(V); - printSimpleInstruction("ldsfld",Tmp.c_str()); - } else { - Tmp = getTypeName(ElemTy)+getValueName(V); - printSimpleInstruction("ldsflda",Tmp.c_str()); - } - } - break; - // Function argument. - case ArgumentVT: - printSimpleInstruction("ldarg",getValueName(V).c_str()); - break; - // Local function variable. - case LocalVT: - printSimpleInstruction("ldloc",getValueName(V).c_str()); - break; - // Constant value. - case ConstVT: - if (isa<ConstantPointerNull>(V)) - printPtrLoad(0); - else - printConstLoad(cast<Constant>(V)); - break; - // Constant expression. - case ConstExprVT: - printConstantExpr(cast<ConstantExpr>(V)); - break; - default: - errs() << "Value = " << *V << '\n'; - llvm_unreachable("Invalid value location"); - } -} - - -void MSILWriter::printValueSave(const Value* V) { - switch (getValueLocation(V)) { - case ArgumentVT: - printSimpleInstruction("starg",getValueName(V).c_str()); - break; - case LocalVT: - printSimpleInstruction("stloc",getValueName(V).c_str()); - break; - default: - errs() << "Value = " << *V << '\n'; - llvm_unreachable("Invalid value location"); - } -} - - -void MSILWriter::printBinaryInstruction(const char* Name, const Value* Left, - const Value* Right) { - printValueLoad(Left); - printValueLoad(Right); - Out << '\t' << Name << '\n'; -} - - -void MSILWriter::printSimpleInstruction(const char* Inst, const char* Operand) { - if(Operand) - Out << '\t' << Inst << '\t' << Operand << '\n'; - else - Out << '\t' << Inst << '\n'; -} - - -void MSILWriter::printPHICopy(const BasicBlock* Src, const BasicBlock* Dst) { - for (BasicBlock::const_iterator I = Dst->begin(); isa<PHINode>(I); ++I) { - const PHINode* Phi = cast<PHINode>(I); - const Value* Val = Phi->getIncomingValueForBlock(Src); - if (isa<UndefValue>(Val)) continue; - printValueLoad(Val); - printValueSave(Phi); - } -} - - -void MSILWriter::printBranchToBlock(const BasicBlock* CurrBB, - const BasicBlock* TrueBB, - const BasicBlock* FalseBB) { - if (TrueBB==FalseBB) { - // "TrueBB" and "FalseBB" destination equals - printPHICopy(CurrBB,TrueBB); - printSimpleInstruction("pop"); - printSimpleInstruction("br",getLabelName(TrueBB).c_str()); - } else if (FalseBB==NULL) { - // If "FalseBB" not used the jump have condition - printPHICopy(CurrBB,TrueBB); - printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str()); - } else if (TrueBB==NULL) { - // If "TrueBB" not used the jump is unconditional - printPHICopy(CurrBB,FalseBB); - printSimpleInstruction("br",getLabelName(FalseBB).c_str()); - } else { - // Copy PHI instructions for each block - std::string TmpLabel; - // Print PHI instructions for "TrueBB" - if (isa<PHINode>(TrueBB->begin())) { - TmpLabel = getLabelName(TrueBB)+"$phi_"+utostr(getUniqID()); - printSimpleInstruction("brtrue",TmpLabel.c_str()); - } else { - printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str()); - } - // Print PHI instructions for "FalseBB" - if (isa<PHINode>(FalseBB->begin())) { - printPHICopy(CurrBB,FalseBB); - printSimpleInstruction("br",getLabelName(FalseBB).c_str()); - } else { - printSimpleInstruction("br",getLabelName(FalseBB).c_str()); - } - if (isa<PHINode>(TrueBB->begin())) { - // Handle "TrueBB" PHI Copy - Out << TmpLabel << ":\n"; - printPHICopy(CurrBB,TrueBB); - printSimpleInstruction("br",getLabelName(TrueBB).c_str()); - } - } -} - - -void MSILWriter::printBranchInstruction(const BranchInst* Inst) { - if (Inst->isUnconditional()) { - printBranchToBlock(Inst->getParent(),NULL,Inst->getSuccessor(0)); - } else { - printValueLoad(Inst->getCondition()); - printBranchToBlock(Inst->getParent(),Inst->getSuccessor(0), - Inst->getSuccessor(1)); - } -} - - -void MSILWriter::printSelectInstruction(const Value* Cond, const Value* VTrue, - const Value* VFalse) { - std::string TmpLabel = std::string("select$true_")+utostr(getUniqID()); - printValueLoad(VTrue); - printValueLoad(Cond); - printSimpleInstruction("brtrue",TmpLabel.c_str()); - printSimpleInstruction("pop"); - printValueLoad(VFalse); - Out << TmpLabel << ":\n"; -} - - -void MSILWriter::printIndirectLoad(const Value* V) { - const Type* Ty = V->getType(); - printValueLoad(V); - if (const PointerType* P = dyn_cast<PointerType>(Ty)) - Ty = P->getElementType(); - std::string Tmp = "ldind."+getTypePostfix(Ty, false); - printSimpleInstruction(Tmp.c_str()); -} - - -void MSILWriter::printIndirectSave(const Value* Ptr, const Value* Val) { - printValueLoad(Ptr); - printValueLoad(Val); - printIndirectSave(Val->getType()); -} - - -void MSILWriter::printIndirectSave(const Type* Ty) { - // Instruction need signed postfix for any type. - std::string postfix = getTypePostfix(Ty, false); - if (*postfix.begin()=='u') *postfix.begin() = 'i'; - postfix = "stind."+postfix; - printSimpleInstruction(postfix.c_str()); -} - - -void MSILWriter::printCastInstruction(unsigned int Op, const Value* V, - const Type* Ty, const Type* SrcTy) { - std::string Tmp(""); - printValueLoad(V); - switch (Op) { - // Signed - case Instruction::SExt: - // If sign extending int, convert first from unsigned to signed - // with the same bit size - because otherwise we will loose the sign. - if (SrcTy) { - Tmp = "conv."+getTypePostfix(SrcTy,false,true); - printSimpleInstruction(Tmp.c_str()); - } - // FALLTHROUGH - case Instruction::SIToFP: - case Instruction::FPToSI: - Tmp = "conv."+getTypePostfix(Ty,false,true); - printSimpleInstruction(Tmp.c_str()); - break; - // Unsigned - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::FPToUI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - Tmp = "conv."+getTypePostfix(Ty,false); - printSimpleInstruction(Tmp.c_str()); - break; - // Do nothing - case Instruction::BitCast: - // FIXME: meaning that ld*/st* instruction do not change data format. - break; - default: - errs() << "Opcode = " << Op << '\n'; - llvm_unreachable("Invalid conversion instruction"); - } -} - - -void MSILWriter::printGepInstruction(const Value* V, gep_type_iterator I, - gep_type_iterator E) { - unsigned Size; - // Load address - printValuePtrLoad(V); - // Calculate element offset. - for (; I!=E; ++I){ - Size = 0; - const Value* IndexValue = I.getOperand(); - if (const StructType* StrucTy = dyn_cast<StructType>(*I)) { - uint64_t FieldIndex = cast<ConstantInt>(IndexValue)->getZExtValue(); - // Offset is the sum of all previous structure fields. - for (uint64_t F = 0; F<FieldIndex; ++F) - Size += TD->getTypeAllocSize(StrucTy->getContainedType((unsigned)F)); - printPtrLoad(Size); - printSimpleInstruction("add"); - continue; - } else if (const SequentialType* SeqTy = dyn_cast<SequentialType>(*I)) { - Size = TD->getTypeAllocSize(SeqTy->getElementType()); - } else { - Size = TD->getTypeAllocSize(*I); - } - // Add offset of current element to stack top. - if (!isZeroValue(IndexValue)) { - // Constant optimization. - if (const ConstantInt* C = dyn_cast<ConstantInt>(IndexValue)) { - if (C->getValue().isNegative()) { - printPtrLoad(C->getValue().abs().getZExtValue()*Size); - printSimpleInstruction("sub"); - continue; - } else - printPtrLoad(C->getZExtValue()*Size); - } else { - printPtrLoad(Size); - printValuePtrLoad(IndexValue); - printSimpleInstruction("mul"); - } - printSimpleInstruction("add"); - } - } -} - - -std::string MSILWriter::getCallSignature(const FunctionType* Ty, - const Instruction* Inst, - std::string Name) { - std::string Tmp(""); - if (Ty->isVarArg()) Tmp += "vararg "; - // Name and return type. - Tmp += getTypeName(Ty->getReturnType())+Name+"("; - // Function argument type list. - unsigned NumParams = Ty->getNumParams(); - for (unsigned I = 0; I!=NumParams; ++I) { - if (I!=0) Tmp += ","; - Tmp += getTypeName(Ty->getParamType(I)); - } - // CLR needs to know the exact amount of parameters received by vararg - // function, because caller cleans the stack. - if (Ty->isVarArg() && Inst) { - // Origin to function arguments in "CallInst" or "InvokeInst". - unsigned Org = isa<InvokeInst>(Inst) ? 3 : 1; - // Print variable argument types. - unsigned NumOperands = Inst->getNumOperands()-Org; - if (NumParams<NumOperands) { - if (NumParams!=0) Tmp += ", "; - Tmp += "... , "; - for (unsigned J = NumParams; J!=NumOperands; ++J) { - if (J!=NumParams) Tmp += ", "; - Tmp += getTypeName(Inst->getOperand(J+Org)->getType()); - } - } - } - return Tmp+")"; -} - - -void MSILWriter::printFunctionCall(const Value* FnVal, - const Instruction* Inst) { - // Get function calling convention. - std::string Name = ""; - if (const CallInst* Call = dyn_cast<CallInst>(Inst)) - Name = getConvModopt(Call->getCallingConv()); - else if (const InvokeInst* Invoke = dyn_cast<InvokeInst>(Inst)) - Name = getConvModopt(Invoke->getCallingConv()); - else { - errs() << "Instruction = " << Inst->getName() << '\n'; - llvm_unreachable("Need \"Invoke\" or \"Call\" instruction only"); - } - if (const Function* F = dyn_cast<Function>(FnVal)) { - // Direct call. - Name += getValueName(F); - printSimpleInstruction("call", - getCallSignature(F->getFunctionType(),Inst,Name).c_str()); - } else { - // Indirect function call. - const PointerType* PTy = cast<PointerType>(FnVal->getType()); - const FunctionType* FTy = cast<FunctionType>(PTy->getElementType()); - // Load function address. - printValueLoad(FnVal); - printSimpleInstruction("calli",getCallSignature(FTy,Inst,Name).c_str()); - } -} - - -void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) { - std::string Name; - switch (Inst->getIntrinsicID()) { - case Intrinsic::vastart: - Name = getValueName(Inst->getArgOperand(0)); - Name.insert(Name.length()-1,"$valist"); - // Obtain the argument handle. - printSimpleInstruction("ldloca",Name.c_str()); - printSimpleInstruction("arglist"); - printSimpleInstruction("call", - "instance void [mscorlib]System.ArgIterator::.ctor" - "(valuetype [mscorlib]System.RuntimeArgumentHandle)"); - // Save as pointer type "void*" - printValueLoad(Inst->getArgOperand(0)); - printSimpleInstruction("ldloca",Name.c_str()); - printIndirectSave(PointerType::getUnqual( - IntegerType::get(Inst->getContext(), 8))); - break; - case Intrinsic::vaend: - // Close argument list handle. - printIndirectLoad(Inst->getArgOperand(0)); - printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()"); - break; - case Intrinsic::vacopy: - // Copy "ArgIterator" valuetype. - printIndirectLoad(Inst->getArgOperand(0)); - printIndirectLoad(Inst->getArgOperand(1)); - printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator"); - break; - default: - errs() << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n'; - llvm_unreachable("Invalid intrinsic function"); - } -} - - -void MSILWriter::printCallInstruction(const Instruction* Inst) { - if (isa<IntrinsicInst>(Inst)) { - // Handle intrinsic function. - printIntrinsicCall(cast<IntrinsicInst>(Inst)); - } else { - const CallInst *CI = cast<CallInst>(Inst); - // Load arguments to stack and call function. - for (int I = 0, E = CI->getNumArgOperands(); I!=E; ++I) - printValueLoad(CI->getArgOperand(I)); - printFunctionCall(CI->getCalledFunction(), Inst); - } -} - - -void MSILWriter::printICmpInstruction(unsigned Predicate, const Value* Left, - const Value* Right) { - switch (Predicate) { - case ICmpInst::ICMP_EQ: - printBinaryInstruction("ceq",Left,Right); - break; - case ICmpInst::ICMP_NE: - // Emulate = not neg (Op1 eq Op2) - printBinaryInstruction("ceq",Left,Right); - printSimpleInstruction("neg"); - printSimpleInstruction("not"); - break; - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: - // Emulate = (Op1 eq Op2) or (Op1 lt Op2) - printBinaryInstruction("ceq",Left,Right); - if (Predicate==ICmpInst::ICMP_ULE) - printBinaryInstruction("clt.un",Left,Right); - else - printBinaryInstruction("clt",Left,Right); - printSimpleInstruction("or"); - break; - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: - // Emulate = (Op1 eq Op2) or (Op1 gt Op2) - printBinaryInstruction("ceq",Left,Right); - if (Predicate==ICmpInst::ICMP_UGE) - printBinaryInstruction("cgt.un",Left,Right); - else - printBinaryInstruction("cgt",Left,Right); - printSimpleInstruction("or"); - break; - case ICmpInst::ICMP_ULT: - printBinaryInstruction("clt.un",Left,Right); - break; - case ICmpInst::ICMP_SLT: - printBinaryInstruction("clt",Left,Right); - break; - case ICmpInst::ICMP_UGT: - printBinaryInstruction("cgt.un",Left,Right); - break; - case ICmpInst::ICMP_SGT: - printBinaryInstruction("cgt",Left,Right); - break; - default: - errs() << "Predicate = " << Predicate << '\n'; - llvm_unreachable("Invalid icmp predicate"); - } -} - - -void MSILWriter::printFCmpInstruction(unsigned Predicate, const Value* Left, - const Value* Right) { - // FIXME: Correct comparison - std::string NanFunc = "bool [mscorlib]System.Double::IsNaN(float64)"; - switch (Predicate) { - case FCmpInst::FCMP_UGT: - // X > Y || llvm_fcmp_uno(X, Y) - printBinaryInstruction("cgt",Left,Right); - printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_OGT: - // X > Y - printBinaryInstruction("cgt",Left,Right); - break; - case FCmpInst::FCMP_UGE: - // X >= Y || llvm_fcmp_uno(X, Y) - printBinaryInstruction("ceq",Left,Right); - printBinaryInstruction("cgt",Left,Right); - printSimpleInstruction("or"); - printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_OGE: - // X >= Y - printBinaryInstruction("ceq",Left,Right); - printBinaryInstruction("cgt",Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_ULT: - // X < Y || llvm_fcmp_uno(X, Y) - printBinaryInstruction("clt",Left,Right); - printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_OLT: - // X < Y - printBinaryInstruction("clt",Left,Right); - break; - case FCmpInst::FCMP_ULE: - // X <= Y || llvm_fcmp_uno(X, Y) - printBinaryInstruction("ceq",Left,Right); - printBinaryInstruction("clt",Left,Right); - printSimpleInstruction("or"); - printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_OLE: - // X <= Y - printBinaryInstruction("ceq",Left,Right); - printBinaryInstruction("clt",Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_UEQ: - // X == Y || llvm_fcmp_uno(X, Y) - printBinaryInstruction("ceq",Left,Right); - printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_OEQ: - // X == Y - printBinaryInstruction("ceq",Left,Right); - break; - case FCmpInst::FCMP_UNE: - // X != Y - printBinaryInstruction("ceq",Left,Right); - printSimpleInstruction("neg"); - printSimpleInstruction("not"); - break; - case FCmpInst::FCMP_ONE: - // X != Y && llvm_fcmp_ord(X, Y) - printBinaryInstruction("ceq",Left,Right); - printSimpleInstruction("not"); - break; - case FCmpInst::FCMP_ORD: - // return X == X && Y == Y - printBinaryInstruction("ceq",Left,Left); - printBinaryInstruction("ceq",Right,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_UNO: - // X != X || Y != Y - printBinaryInstruction("ceq",Left,Left); - printSimpleInstruction("not"); - printBinaryInstruction("ceq",Right,Right); - printSimpleInstruction("not"); - printSimpleInstruction("or"); - break; - default: - llvm_unreachable("Illegal FCmp predicate"); - } -} - - -void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) { - std::string Label = "leave$normal_"+utostr(getUniqID()); - Out << ".try {\n"; - // Load arguments - for (int I = 0, E = Inst->getNumArgOperands(); I!=E; ++I) - printValueLoad(Inst->getArgOperand(I)); - // Print call instruction - printFunctionCall(Inst->getOperand(0),Inst); - // Save function result and leave "try" block - printValueSave(Inst); - printSimpleInstruction("leave",Label.c_str()); - Out << "}\n"; - Out << "catch [mscorlib]System.Exception {\n"; - // Redirect to unwind block - printSimpleInstruction("pop"); - printBranchToBlock(Inst->getParent(),NULL,Inst->getUnwindDest()); - Out << "}\n" << Label << ":\n"; - // Redirect to continue block - printBranchToBlock(Inst->getParent(),NULL,Inst->getNormalDest()); -} - - -void MSILWriter::printSwitchInstruction(const SwitchInst* Inst) { - // FIXME: Emulate with IL "switch" instruction - // Emulate = if () else if () else if () else ... - for (unsigned int I = 1, E = Inst->getNumCases(); I!=E; ++I) { - printValueLoad(Inst->getCondition()); - printValueLoad(Inst->getCaseValue(I)); - printSimpleInstruction("ceq"); - // Condition jump to successor block - printBranchToBlock(Inst->getParent(),Inst->getSuccessor(I),NULL); - } - // Jump to default block - printBranchToBlock(Inst->getParent(),NULL,Inst->getDefaultDest()); -} - - -void MSILWriter::printVAArgInstruction(const VAArgInst* Inst) { - printIndirectLoad(Inst->getOperand(0)); - printSimpleInstruction("call", - "instance typedref [mscorlib]System.ArgIterator::GetNextArg()"); - printSimpleInstruction("refanyval","void*"); - std::string Name = - "ldind."+getTypePostfix(PointerType::getUnqual( - IntegerType::get(Inst->getContext(), 8)),false); - printSimpleInstruction(Name.c_str()); -} - - -void MSILWriter::printAllocaInstruction(const AllocaInst* Inst) { - uint64_t Size = TD->getTypeAllocSize(Inst->getAllocatedType()); - // Constant optimization. - if (const ConstantInt* CInt = dyn_cast<ConstantInt>(Inst->getOperand(0))) { - printPtrLoad(CInt->getZExtValue()*Size); - } else { - printPtrLoad(Size); - printValueLoad(Inst->getOperand(0)); - printSimpleInstruction("mul"); - } - printSimpleInstruction("localloc"); -} - - -void MSILWriter::printInstruction(const Instruction* Inst) { - const Value *Left = 0, *Right = 0; - if (Inst->getNumOperands()>=1) Left = Inst->getOperand(0); - if (Inst->getNumOperands()>=2) Right = Inst->getOperand(1); - // Print instruction - // FIXME: "ShuffleVector","ExtractElement","InsertElement" support. - switch (Inst->getOpcode()) { - // Terminator - case Instruction::Ret: - if (Inst->getNumOperands()) { - printValueLoad(Left); - printSimpleInstruction("ret"); - } else - printSimpleInstruction("ret"); - break; - case Instruction::Br: - printBranchInstruction(cast<BranchInst>(Inst)); - break; - // Binary - case Instruction::Add: - case Instruction::FAdd: - printBinaryInstruction("add",Left,Right); - break; - case Instruction::Sub: - case Instruction::FSub: - printBinaryInstruction("sub",Left,Right); - break; - case Instruction::Mul: - case Instruction::FMul: - printBinaryInstruction("mul",Left,Right); - break; - case Instruction::UDiv: - printBinaryInstruction("div.un",Left,Right); - break; - case Instruction::SDiv: - case Instruction::FDiv: - printBinaryInstruction("div",Left,Right); - break; - case Instruction::URem: - printBinaryInstruction("rem.un",Left,Right); - break; - case Instruction::SRem: - case Instruction::FRem: - printBinaryInstruction("rem",Left,Right); - break; - // Binary Condition - case Instruction::ICmp: - printICmpInstruction(cast<ICmpInst>(Inst)->getPredicate(),Left,Right); - break; - case Instruction::FCmp: - printFCmpInstruction(cast<FCmpInst>(Inst)->getPredicate(),Left,Right); - break; - // Bitwise Binary - case Instruction::And: - printBinaryInstruction("and",Left,Right); - break; - case Instruction::Or: - printBinaryInstruction("or",Left,Right); - break; - case Instruction::Xor: - printBinaryInstruction("xor",Left,Right); - break; - case Instruction::Shl: - printValueLoad(Left); - printValueLoad(Right); - printSimpleInstruction("conv.i4"); - printSimpleInstruction("shl"); - break; - case Instruction::LShr: - printValueLoad(Left); - printValueLoad(Right); - printSimpleInstruction("conv.i4"); - printSimpleInstruction("shr.un"); - break; - case Instruction::AShr: - printValueLoad(Left); - printValueLoad(Right); - printSimpleInstruction("conv.i4"); - printSimpleInstruction("shr"); - break; - case Instruction::Select: - printSelectInstruction(Inst->getOperand(0),Inst->getOperand(1),Inst->getOperand(2)); - break; - case Instruction::Load: - printIndirectLoad(Inst->getOperand(0)); - break; - case Instruction::Store: - printIndirectSave(Inst->getOperand(1), Inst->getOperand(0)); - break; - case Instruction::SExt: - printCastInstruction(Inst->getOpcode(),Left, - cast<CastInst>(Inst)->getDestTy(), - cast<CastInst>(Inst)->getSrcTy()); - break; - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - printCastInstruction(Inst->getOpcode(),Left, - cast<CastInst>(Inst)->getDestTy()); - break; - case Instruction::GetElementPtr: - printGepInstruction(Inst->getOperand(0),gep_type_begin(Inst), - gep_type_end(Inst)); - break; - case Instruction::Call: - printCallInstruction(cast<CallInst>(Inst)); - break; - case Instruction::Invoke: - printInvokeInstruction(cast<InvokeInst>(Inst)); - break; - case Instruction::Unwind: - printSimpleInstruction("newobj", - "instance void [mscorlib]System.Exception::.ctor()"); - printSimpleInstruction("throw"); - break; - case Instruction::Switch: - printSwitchInstruction(cast<SwitchInst>(Inst)); - break; - case Instruction::Alloca: - printAllocaInstruction(cast<AllocaInst>(Inst)); - break; - case Instruction::Unreachable: - printSimpleInstruction("ldstr", "\"Unreachable instruction\""); - printSimpleInstruction("newobj", - "instance void [mscorlib]System.Exception::.ctor(string)"); - printSimpleInstruction("throw"); - break; - case Instruction::VAArg: - printVAArgInstruction(cast<VAArgInst>(Inst)); - break; - default: - errs() << "Instruction = " << Inst->getName() << '\n'; - llvm_unreachable("Unsupported instruction"); - } -} - - -void MSILWriter::printLoop(const Loop* L) { - Out << getLabelName(L->getHeader()->getName()) << ":\n"; - const std::vector<BasicBlock*>& blocks = L->getBlocks(); - for (unsigned I = 0, E = blocks.size(); I!=E; I++) { - BasicBlock* BB = blocks[I]; - Loop* BBLoop = LInfo->getLoopFor(BB); - if (BBLoop == L) - printBasicBlock(BB); - else if (BB==BBLoop->getHeader() && BBLoop->getParentLoop()==L) - printLoop(BBLoop); - } - printSimpleInstruction("br",getLabelName(L->getHeader()->getName()).c_str()); -} - - -void MSILWriter::printBasicBlock(const BasicBlock* BB) { - Out << getLabelName(BB) << ":\n"; - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { - const Instruction* Inst = I; - // Comment llvm original instruction - // Out << "\n//" << *Inst << "\n"; - // Do not handle PHI instruction in current block - if (Inst->getOpcode()==Instruction::PHI) continue; - // Print instruction - printInstruction(Inst); - // Save result - if (Inst->getType()!=Type::getVoidTy(BB->getContext())) { - // Do not save value after invoke, it done in "try" block - if (Inst->getOpcode()==Instruction::Invoke) continue; - printValueSave(Inst); - } - } -} - - -void MSILWriter::printLocalVariables(const Function& F) { - std::string Name; - const Type* Ty = NULL; - std::set<const Value*> Printed; - const Value* VaList = NULL; - unsigned StackDepth = 8; - // Find local variables - for (const_inst_iterator I = inst_begin(&F), E = inst_end(&F); I!=E; ++I) { - if (I->getOpcode()==Instruction::Call || - I->getOpcode()==Instruction::Invoke) { - // Test stack depth. - if (StackDepth<I->getNumOperands()) - StackDepth = I->getNumOperands(); - } - const AllocaInst* AI = dyn_cast<AllocaInst>(&*I); - if (AI && !isa<GlobalVariable>(AI)) { - // Local variable allocation. - Ty = PointerType::getUnqual(AI->getAllocatedType()); - Name = getValueName(AI); - Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n"; - } else if (I->getType()!=Type::getVoidTy(F.getContext())) { - // Operation result. - Ty = I->getType(); - Name = getValueName(&*I); - Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n"; - } - // Test on 'va_list' variable - bool isVaList = false; - if (const VAArgInst* VaInst = dyn_cast<VAArgInst>(&*I)) { - // "va_list" as "va_arg" instruction operand. - isVaList = true; - VaList = VaInst->getOperand(0); - } else if (const IntrinsicInst* Inst = dyn_cast<IntrinsicInst>(&*I)) { - // "va_list" as intrinsic function operand. - switch (Inst->getIntrinsicID()) { - case Intrinsic::vastart: - case Intrinsic::vaend: - case Intrinsic::vacopy: - isVaList = true; - VaList = Inst->getArgOperand(0); - break; - default: - isVaList = false; - } - } - // Print "va_list" variable. - if (isVaList && Printed.insert(VaList).second) { - Name = getValueName(VaList); - Name.insert(Name.length()-1,"$valist"); - Out << "\t.locals (valuetype [mscorlib]System.ArgIterator " - << Name << ")\n"; - } - } - printSimpleInstruction(".maxstack",utostr(StackDepth*2).c_str()); -} - - -void MSILWriter::printFunctionBody(const Function& F) { - // Print body - for (Function::const_iterator I = F.begin(), E = F.end(); I!=E; ++I) { - if (Loop *L = LInfo->getLoopFor(I)) { - if (L->getHeader()==I && L->getParentLoop()==0) - printLoop(L); - } else { - printBasicBlock(I); - } - } -} - - -void MSILWriter::printConstantExpr(const ConstantExpr* CE) { - const Value *left = 0, *right = 0; - if (CE->getNumOperands()>=1) left = CE->getOperand(0); - if (CE->getNumOperands()>=2) right = CE->getOperand(1); - // Print instruction - switch (CE->getOpcode()) { - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - printCastInstruction(CE->getOpcode(),left,CE->getType()); - break; - case Instruction::GetElementPtr: - printGepInstruction(CE->getOperand(0),gep_type_begin(CE),gep_type_end(CE)); - break; - case Instruction::ICmp: - printICmpInstruction(CE->getPredicate(),left,right); - break; - case Instruction::FCmp: - printFCmpInstruction(CE->getPredicate(),left,right); - break; - case Instruction::Select: - printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2)); - break; - case Instruction::Add: - case Instruction::FAdd: - printBinaryInstruction("add",left,right); - break; - case Instruction::Sub: - case Instruction::FSub: - printBinaryInstruction("sub",left,right); - break; - case Instruction::Mul: - case Instruction::FMul: - printBinaryInstruction("mul",left,right); - break; - case Instruction::UDiv: - printBinaryInstruction("div.un",left,right); - break; - case Instruction::SDiv: - case Instruction::FDiv: - printBinaryInstruction("div",left,right); - break; - case Instruction::URem: - printBinaryInstruction("rem.un",left,right); - break; - case Instruction::SRem: - case Instruction::FRem: - printBinaryInstruction("rem",left,right); - break; - case Instruction::And: - printBinaryInstruction("and",left,right); - break; - case Instruction::Or: - printBinaryInstruction("or",left,right); - break; - case Instruction::Xor: - printBinaryInstruction("xor",left,right); - break; - case Instruction::Shl: - printBinaryInstruction("shl",left,right); - break; - case Instruction::LShr: - printBinaryInstruction("shr.un",left,right); - break; - case Instruction::AShr: - printBinaryInstruction("shr",left,right); - break; - default: - errs() << "Expression = " << *CE << "\n"; - llvm_unreachable("Invalid constant expression"); - } -} - - -void MSILWriter::printStaticInitializerList() { - // List of global variables with uninitialized fields. - for (std::map<const GlobalVariable*,std::vector<StaticInitializer> >::iterator - VarI = StaticInitList.begin(), VarE = StaticInitList.end(); VarI!=VarE; - ++VarI) { - const std::vector<StaticInitializer>& InitList = VarI->second; - if (InitList.empty()) continue; - // For each uninitialized field. - for (std::vector<StaticInitializer>::const_iterator I = InitList.begin(), - E = InitList.end(); I!=E; ++I) { - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(I->constant)) { - // Out << "\n// Init " << getValueName(VarI->first) << ", offset " << - // utostr(I->offset) << ", type "<< *I->constant->getType() << "\n\n"; - // Load variable address - printValueLoad(VarI->first); - // Add offset - if (I->offset!=0) { - printPtrLoad(I->offset); - printSimpleInstruction("add"); - } - // Load value - printConstantExpr(CE); - // Save result at offset - std::string postfix = getTypePostfix(CE->getType(),true); - if (*postfix.begin()=='u') *postfix.begin() = 'i'; - postfix = "stind."+postfix; - printSimpleInstruction(postfix.c_str()); - } else { - errs() << "Constant = " << *I->constant << '\n'; - llvm_unreachable("Invalid static initializer"); - } - } - } -} - - -void MSILWriter::printFunction(const Function& F) { - bool isSigned = F.paramHasAttr(0, Attribute::SExt); - Out << "\n.method static "; - Out << (F.hasLocalLinkage() ? "private " : "public "); - if (F.isVarArg()) Out << "vararg "; - Out << getTypeName(F.getReturnType(),isSigned) << - getConvModopt(F.getCallingConv()) << getValueName(&F) << '\n'; - // Arguments - Out << "\t("; - unsigned ArgIdx = 1; - for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I!=E; - ++I, ++ArgIdx) { - isSigned = F.paramHasAttr(ArgIdx, Attribute::SExt); - if (I!=F.arg_begin()) Out << ", "; - Out << getTypeName(I->getType(),isSigned) << getValueName(I); - } - Out << ") cil managed\n"; - // Body - Out << "{\n"; - printLocalVariables(F); - printFunctionBody(F); - Out << "}\n"; -} - - -void MSILWriter::printDeclarations(const TypeSymbolTable& ST) { - std::string Name; - std::set<const Type*> Printed; - for (std::set<const Type*>::const_iterator - UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) { - const Type* Ty = *UI; - if (Ty->isArrayTy() || Ty->isVectorTy() || Ty->isStructTy()) - Name = getTypeName(Ty, false, true); - // Type with no need to declare. - else continue; - // Print not duplicated type - if (Printed.insert(Ty).second) { - Out << ".class value explicit ansi sealed '" << Name << "'"; - Out << " { .pack " << 1 << " .size " << TD->getTypeAllocSize(Ty); - Out << " }\n\n"; - } - } -} - - -unsigned int MSILWriter::getBitWidth(const Type* Ty) { - unsigned int N = Ty->getPrimitiveSizeInBits(); - assert(N!=0 && "Invalid type in getBitWidth()"); - switch (N) { - case 1: - case 8: - case 16: - case 32: - case 64: - return N; - default: - errs() << "Bits = " << N << '\n'; - llvm_unreachable("Unsupported integer width"); - } - return 0; // Not reached -} - - -void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) { - uint64_t TySize = 0; - const Type* Ty = C->getType(); - // Print zero initialized constant. - if (isa<ConstantAggregateZero>(C) || C->isNullValue()) { - TySize = TD->getTypeAllocSize(C->getType()); - Offset += TySize; - Out << "int8 (0) [" << TySize << "]"; - return; - } - // Print constant initializer - switch (Ty->getTypeID()) { - case Type::IntegerTyID: { - TySize = TD->getTypeAllocSize(Ty); - const ConstantInt* Int = cast<ConstantInt>(C); - Out << getPrimitiveTypeName(Ty,true) << "(" << Int->getSExtValue() << ")"; - break; - } - case Type::FloatTyID: - case Type::DoubleTyID: { - TySize = TD->getTypeAllocSize(Ty); - const ConstantFP* FP = cast<ConstantFP>(C); - if (Ty->getTypeID() == Type::FloatTyID) - Out << "int32 (" << - (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')'; - else - Out << "int64 (" << - FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')'; - break; - } - case Type::ArrayTyID: - case Type::VectorTyID: - case Type::StructTyID: - for (unsigned I = 0, E = C->getNumOperands(); I<E; I++) { - if (I!=0) Out << ",\n"; - printStaticConstant(cast<Constant>(C->getOperand(I)), Offset); - } - break; - case Type::PointerTyID: - TySize = TD->getTypeAllocSize(C->getType()); - // Initialize with global variable address - if (const GlobalVariable *G = dyn_cast<GlobalVariable>(C)) { - std::string name = getValueName(G); - Out << "&(" << name.insert(name.length()-1,"$data") << ")"; - } else { - // Dynamic initialization - if (!isa<ConstantPointerNull>(C) && !C->isNullValue()) - InitListPtr->push_back(StaticInitializer(C,Offset)); - // Null pointer initialization - if (TySize==4) Out << "int32 (0)"; - else if (TySize==8) Out << "int64 (0)"; - else llvm_unreachable("Invalid pointer size"); - } - break; - default: - errs() << "TypeID = " << Ty->getTypeID() << '\n'; - llvm_unreachable("Invalid type in printStaticConstant()"); - } - // Increase offset. - Offset += TySize; -} - - -void MSILWriter::printStaticInitializer(const Constant* C, - const std::string& Name) { - switch (C->getType()->getTypeID()) { - case Type::IntegerTyID: - case Type::FloatTyID: - case Type::DoubleTyID: - Out << getPrimitiveTypeName(C->getType(), false); - break; - case Type::ArrayTyID: - case Type::VectorTyID: - case Type::StructTyID: - case Type::PointerTyID: - Out << getTypeName(C->getType()); - break; - default: - errs() << "Type = " << *C << "\n"; - llvm_unreachable("Invalid constant type"); - } - // Print initializer - std::string label = Name; - label.insert(label.length()-1,"$data"); - Out << Name << " at " << label << '\n'; - Out << ".data " << label << " = {\n"; - uint64_t offset = 0; - printStaticConstant(C,offset); - Out << "\n}\n\n"; -} - - -void MSILWriter::printVariableDefinition(const GlobalVariable* G) { - const Constant* C = G->getInitializer(); - if (C->isNullValue() || isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) - InitListPtr = 0; - else - InitListPtr = &StaticInitList[G]; - printStaticInitializer(C,getValueName(G)); -} - - -void MSILWriter::printGlobalVariables() { - if (ModulePtr->global_empty()) return; - Module::global_iterator I,E; - for (I = ModulePtr->global_begin(), E = ModulePtr->global_end(); I!=E; ++I) { - // Variable definition - Out << ".field static " << (I->isDeclaration() ? "public " : - "private "); - if (I->isDeclaration()) { - Out << getTypeName(I->getType()) << getValueName(&*I) << "\n\n"; - } else - printVariableDefinition(&*I); - } -} - - -const char* MSILWriter::getLibraryName(const Function* F) { - return getLibraryForSymbol(F->getName(), true, F->getCallingConv()); -} - - -const char* MSILWriter::getLibraryName(const GlobalVariable* GV) { - return getLibraryForSymbol(GV->getName(), false, CallingConv::C); -} - - -const char* MSILWriter::getLibraryForSymbol(StringRef Name, bool isFunction, - CallingConv::ID CallingConv) { - // TODO: Read *.def file with function and libraries definitions. - return "MSVCRT.DLL"; -} - - -void MSILWriter::printExternals() { - Module::const_iterator I,E; - // Functions. - for (I=ModulePtr->begin(),E=ModulePtr->end(); I!=E; ++I) { - // Skip intrisics - if (I->isIntrinsic()) continue; - if (I->isDeclaration()) { - const Function* F = I; - std::string Name = getConvModopt(F->getCallingConv())+getValueName(F); - std::string Sig = - getCallSignature(cast<FunctionType>(F->getFunctionType()), NULL, Name); - Out << ".method static hidebysig pinvokeimpl(\"" - << getLibraryName(F) << "\")\n\t" << Sig << " preservesig {}\n\n"; - } - } - // External variables and static initialization. - Out << - ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)" - " native int LoadLibrary(string) preservesig {}\n" - ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)" - " native int GetProcAddress(native int, string) preservesig {}\n"; - Out << - ".method private static void* $MSIL_Import(string lib,string sym)\n" - " managed cil\n{\n" - "\tldarg\tlib\n" - "\tcall\tnative int LoadLibrary(string)\n" - "\tldarg\tsym\n" - "\tcall\tnative int GetProcAddress(native int,string)\n" - "\tdup\n" - "\tbrtrue\tL_01\n" - "\tldstr\t\"Can no import variable\"\n" - "\tnewobj\tinstance void [mscorlib]System.Exception::.ctor(string)\n" - "\tthrow\n" - "L_01:\n" - "\tret\n" - "}\n\n" - ".method static private void $MSIL_Init() managed cil\n{\n"; - printStaticInitializerList(); - // Foreach global variable. - for (Module::global_iterator I = ModulePtr->global_begin(), - E = ModulePtr->global_end(); I!=E; ++I) { - if (!I->isDeclaration() || !I->hasDLLImportLinkage()) continue; - // Use "LoadLibrary"/"GetProcAddress" to recive variable address. - std::string Tmp = getTypeName(I->getType())+getValueName(&*I); - printSimpleInstruction("ldsflda",Tmp.c_str()); - Out << "\tldstr\t\"" << getLibraryName(&*I) << "\"\n"; - Out << "\tldstr\t\"" << I->getName() << "\"\n"; - printSimpleInstruction("call","void* $MSIL_Import(string,string)"); - printIndirectSave(I->getType()); - } - printSimpleInstruction("ret"); - Out << "}\n\n"; -} - - -//===----------------------------------------------------------------------===// -// External Interface declaration -//===----------------------------------------------------------------------===// - -bool MSILTarget::addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) -{ - if (FileType != TargetMachine::CGFT_AssemblyFile) return true; - MSILWriter* Writer = new MSILWriter(o); - PM.add(createGCLoweringPass()); - // FIXME: Handle switch through native IL instruction "switch" - PM.add(createLowerSwitchPass()); - PM.add(createCFGSimplificationPass()); - PM.add(new MSILModule(Writer->UsedTypes,Writer->TD)); - PM.add(Writer); - PM.add(createGCInfoDeleter()); - return false; -} diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h deleted file mode 100644 index 92a3abe5c0a7..000000000000 --- a/lib/Target/MSIL/MSILWriter.h +++ /dev/null @@ -1,258 +0,0 @@ -//===-- MSILWriter.h - TargetMachine for the MSIL ---------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the MSILWriter that is used by the MSIL. -// -//===----------------------------------------------------------------------===// -#ifndef MSILWRITER_H -#define MSILWRITER_H - -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Pass.h" -#include "llvm/PassManager.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - extern Target TheMSILTarget; - - class MSILModule : public ModulePass { - Module *ModulePtr; - const std::set<const Type *>*& UsedTypes; - const TargetData*& TD; - - public: - static char ID; - MSILModule(const std::set<const Type *>*& _UsedTypes, - const TargetData*& _TD) - : ModulePass(&ID), UsedTypes(_UsedTypes), TD(_TD) {} - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<FindUsedTypes>(); - AU.addRequired<TargetData>(); - } - - virtual const char *getPassName() const { - return "MSIL backend definitions"; - } - - virtual bool runOnModule(Module &M); - - }; - - class MSILWriter : public FunctionPass { - struct StaticInitializer { - const Constant* constant; - uint64_t offset; - - StaticInitializer() - : constant(0), offset(0) {} - - StaticInitializer(const Constant* _constant, uint64_t _offset) - : constant(_constant), offset(_offset) {} - }; - - uint64_t UniqID; - - uint64_t getUniqID() { - return ++UniqID; - } - - public: - formatted_raw_ostream &Out; - Module* ModulePtr; - const TargetData* TD; - LoopInfo *LInfo; - std::vector<StaticInitializer>* InitListPtr; - std::map<const GlobalVariable*,std::vector<StaticInitializer> > - StaticInitList; - const std::set<const Type *>* UsedTypes; - static char ID; - DenseMap<const Value*, unsigned> AnonValueNumbers; - unsigned NextAnonValueNumber; - - MSILWriter(formatted_raw_ostream &o) : FunctionPass(&ID), Out(o), - NextAnonValueNumber(0) { - UniqID = 0; - } - - enum ValueType { - UndefVT, - GlobalVT, - InternalVT, - ArgumentVT, - LocalVT, - ConstVT, - ConstExprVT - }; - - bool isVariable(ValueType V) { - return V==GlobalVT || V==InternalVT || V==ArgumentVT || V==LocalVT; - } - - bool isConstValue(ValueType V) { - return V==ConstVT || V==ConstExprVT; - } - - virtual const char *getPassName() const { return "MSIL backend"; } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<LoopInfo>(); - AU.setPreservesAll(); - } - - bool runOnFunction(Function &F); - - virtual bool doInitialization(Module &M); - - virtual bool doFinalization(Module &M); - - void printModuleStartup(); - - bool isZeroValue(const Value* V); - - std::string getValueName(const Value* V); - - std::string getLabelName(const Value* V); - - std::string getLabelName(const std::string& Name); - - std::string getConvModopt(CallingConv::ID CallingConvID); - - std::string getArrayTypeName(Type::TypeID TyID, const Type* Ty); - - std::string getPrimitiveTypeName(const Type* Ty, bool isSigned); - - std::string getFunctionTypeName(const Type* Ty); - - std::string getPointerTypeName(const Type* Ty); - - std::string getTypeName(const Type* Ty, bool isSigned = false, - bool isNested = false); - - ValueType getValueLocation(const Value* V); - - std::string getTypePostfix(const Type* Ty, bool Expand, - bool isSigned = false); - - void printConvToPtr(); - - void printPtrLoad(uint64_t N); - - void printValuePtrLoad(const Value* V); - - void printConstLoad(const Constant* C); - - void printValueLoad(const Value* V); - - void printValueSave(const Value* V); - - void printBinaryInstruction(const char* Name, const Value* Left, - const Value* Right); - - void printSimpleInstruction(const char* Inst, const char* Operand = NULL); - - void printPHICopy(const BasicBlock* Src, const BasicBlock* Dst); - - void printBranchToBlock(const BasicBlock* CurrBB, - const BasicBlock* TrueBB, - const BasicBlock* FalseBB); - - void printBranchInstruction(const BranchInst* Inst); - - void printSelectInstruction(const Value* Cond, const Value* VTrue, - const Value* VFalse); - - void printIndirectLoad(const Value* V); - - void printIndirectSave(const Value* Ptr, const Value* Val); - - void printIndirectSave(const Type* Ty); - - void printCastInstruction(unsigned int Op, const Value* V, - const Type* Ty, const Type* SrcTy=0); - - void printGepInstruction(const Value* V, gep_type_iterator I, - gep_type_iterator E); - - std::string getCallSignature(const FunctionType* Ty, - const Instruction* Inst, - std::string Name); - - void printFunctionCall(const Value* FnVal, const Instruction* Inst); - - void printIntrinsicCall(const IntrinsicInst* Inst); - - void printCallInstruction(const Instruction* Inst); - - void printICmpInstruction(unsigned Predicate, const Value* Left, - const Value* Right); - - void printFCmpInstruction(unsigned Predicate, const Value* Left, - const Value* Right); - - void printInvokeInstruction(const InvokeInst* Inst); - - void printSwitchInstruction(const SwitchInst* Inst); - - void printVAArgInstruction(const VAArgInst* Inst); - - void printAllocaInstruction(const AllocaInst* Inst); - - void printInstruction(const Instruction* Inst); - - void printLoop(const Loop* L); - - void printBasicBlock(const BasicBlock* BB); - - void printLocalVariables(const Function& F); - - void printFunctionBody(const Function& F); - - void printConstantExpr(const ConstantExpr* CE); - - void printStaticInitializerList(); - - void printFunction(const Function& F); - - void printDeclarations(const TypeSymbolTable& ST); - - unsigned int getBitWidth(const Type* Ty); - - void printStaticConstant(const Constant* C, uint64_t& Offset); - - void printStaticInitializer(const Constant* C, const std::string& Name); - - void printVariableDefinition(const GlobalVariable* G); - - void printGlobalVariables(); - - const char* getLibraryName(const Function* F); - - const char* getLibraryName(const GlobalVariable* GV); - - const char* getLibraryForSymbol(StringRef Name, bool isFunction, - CallingConv::ID CallingConv); - - void printExternals(); - }; - -} - -#endif - diff --git a/lib/Target/MSIL/Makefile b/lib/Target/MSIL/Makefile deleted file mode 100644 index 70eadb32e360..000000000000 --- a/lib/Target/MSIL/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/MSIL/Makefile ----------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMMSIL -DIRS = TargetInfo - -include $(LEVEL)/Makefile.common - -CompileCommonOpts := $(CompileCommonOpts) -Wno-format diff --git a/lib/Target/MSIL/README.TXT b/lib/Target/MSIL/README.TXT deleted file mode 100644 index d797c71fd39f..000000000000 --- a/lib/Target/MSIL/README.TXT +++ /dev/null @@ -1,26 +0,0 @@ -//===---------------------------------------------------------------------===// - -Vector instructions support. - -ShuffleVector -ExtractElement -InsertElement - -//===---------------------------------------------------------------------===// - -Add "OpaqueType" type. - -//===---------------------------------------------------------------------===// - -"switch" instruction emulation with CLI "switch" instruction. - -//===---------------------------------------------------------------------===// - -Write linker for external function, because function export need to know -dynamic library where function located. - -.method static hidebysig pinvokeimpl("msvcrt.dll" cdecl) - void free(void*) preservesig {} - - - diff --git a/lib/Target/MSIL/TargetInfo/CMakeLists.txt b/lib/Target/MSIL/TargetInfo/CMakeLists.txt deleted file mode 100644 index 9f0c3a09341a..000000000000 --- a/lib/Target/MSIL/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMSILInfo - MSILTargetInfo.cpp - ) - diff --git a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp b/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp deleted file mode 100644 index dfd42814e51c..000000000000 --- a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp +++ /dev/null @@ -1,26 +0,0 @@ -//===-- MSILTargetInfo.cpp - MSIL Target Implementation -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MSILWriter.h" -#include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" -using namespace llvm; - -Target llvm::TheMSILTarget; - -static unsigned MSIL_TripleMatchQuality(const std::string &TT) { - // This class always works, but shouldn't be the default in most cases. - return 1; -} - -extern "C" void LLVMInitializeMSILTargetInfo() { - TargetRegistry::RegisterTarget(TheMSILTarget, "msil", - "MSIL backend", - &MSIL_TripleMatchQuality); -} diff --git a/lib/Target/MSIL/TargetInfo/Makefile b/lib/Target/MSIL/TargetInfo/Makefile deleted file mode 100644 index 30b0950db0f7..000000000000 --- a/lib/Target/MSIL/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/MSIL/TargetInfo/Makefile -----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMSILInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp index 68cb342b08f4..bd644435c76f 100644 --- a/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -10,7 +10,7 @@ // This file contains a pass that scans a machine function to determine which // conditional branches need more than 10 bits of displacement to reach their // target basic block. It does this in two passes; a calculation of basic block -// positions pass, and a branch psuedo op to machine branch opcode pass. This +// positions pass, and a branch pseudo op to machine branch opcode pass. This // pass should be run last, just before the assembly printer. // //===----------------------------------------------------------------------===// @@ -30,7 +30,7 @@ STATISTIC(NumExpanded, "Number of branches expanded to long format"); namespace { struct MSP430BSel : public MachineFunctionPass { static char ID; - MSP430BSel() : MachineFunctionPass(&ID) {} + MSP430BSel() : MachineFunctionPass(ID) {} /// BlockSizes - The sizes of the basic blocks in the function. std::vector<unsigned> BlockSizes; @@ -52,7 +52,8 @@ FunctionPass *llvm::createMSP430BranchSelectionPass() { } bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) { - const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + const MSP430InstrInfo *TII = + static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo()); // Give the blocks of the function a dense, in-order, numbering. Fn.RenumberBlocks(); BlockSizes.resize(Fn.getNumBlockIDs()); diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index df28d07f5d71..bfab844f5b1a 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -100,27 +100,6 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } bool -MSP430InstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers yet. - - switch (MI.getOpcode()) { - default: - return false; - case MSP430::MOV8rr: - case MSP430::MOV16rr: - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid register-register move instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - return true; - } -} - -bool MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, @@ -361,7 +340,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { switch (Desc.getOpcode()) { default: assert(0 && "Unknown instruction size!"); - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index ebbda1aeef51..49ccc032bf29 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -54,10 +54,6 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - bool isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 608ca49fcf78..3c3fa73477a5 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -101,7 +101,7 @@ bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const { MFI->isFrameAddressTaken()); } -bool MSP430RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { +bool MSP430RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } @@ -163,10 +163,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -unsigned +void MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -204,7 +203,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(BasePtr, false); if (Offset == 0) - return 0; + return; // We need to materialize the offset via add instruction. unsigned DstReg = MI.getOperand(0).getReg(); @@ -215,12 +214,11 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg) .addReg(DstReg).addImm(Offset); - return 0; + return; } MI.getOperand(i).ChangeToRegister(BasePtr, false); MI.getOperand(i+1).ChangeToImmediate(Offset); - return 0; } void diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 6e58d3116d27..4d2795bb4020 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -40,15 +40,14 @@ public: const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const; bool hasFP(const MachineFunction &MF) const; - bool hasReservedCallFrame(MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 2037a9114559..49efe75d79d8 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -180,7 +180,8 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, ManglerPrefixTy PrefixTy = Mangler::Default; if (GV->hasPrivateLinkage() || isImplicitlyPrivate) PrefixTy = Mangler::Private; - else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage()) + else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() || + GV->hasLinkerPrivateWeakDefAutoLinkage()) PrefixTy = Mangler::LinkerPrivate; // If this global has a name, handle it simply. diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 8ae05b75e919..6660f6b62430 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -18,6 +18,8 @@ #include "MipsInstrInfo.h" #include "MipsTargetMachine.h" #include "MipsMachineFunction.h" +#include "llvm/BasicBlock.h" +#include "llvm/Instructions.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -75,6 +77,7 @@ namespace { } virtual void EmitFunctionBodyStart(); virtual void EmitFunctionBodyEnd(); + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; static const char *getRegisterName(unsigned RegNo); virtual void EmitFunctionEntryLabel(); @@ -227,6 +230,23 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { } +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) + const { + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *MBB->pred_begin(); + + // If the predecessor is a switch statement, assume a jump table + // implementation, so it is not a fall through. + if (const BasicBlock *bb = Pred->getBasicBlock()) + if (isa<SwitchInst>(bb->getTerminator())) + return false; + + return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); +} + // Print out an operand for an inline asm expression. bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,const char *ExtraCode, diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index aa036aef83d0..a51c3779c7f4 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -1,4 +1,4 @@ -//===- Mips.td - Describe the Mips Target Machine ---------------*- C++ -*-===// +//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index c2bfb8fa738c..8f313efaf8da 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -1,4 +1,4 @@ -//===- MipsCallingConv.td - Calling Conventions for Mips --------*- C++ -*-===// +//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index a2b615d8add2..597ea0d6c207 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -32,7 +32,7 @@ namespace { static char ID; Filler(TargetMachine &tm) - : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { } + : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } virtual const char *getPassName() const { return "Mips Delay Slot Filler"; diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 3888bbf09ec7..a47cf7b4f201 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -137,7 +137,7 @@ SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - if (Predicate_immSExt16(CN)) { + if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> @@ -184,8 +184,9 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { if (!Subtarget.isMips1() || NVT != MVT::f64) return NULL; - if (!Predicate_unindexedload(N) || - !Predicate_load(N)) + LoadSDNode *LN = cast<LoadSDNode>(N); + if (LN->getExtensionType() != ISD::NON_EXTLOAD || + LN->getAddressingMode() != ISD::UNINDEXED) return NULL; SDValue Chain = N->getOperand(0); @@ -248,8 +249,8 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { SDValue Chain = N->getOperand(0); - if (!Predicate_unindexedstore(N) || - !Predicate_store(N)) + StoreSDNode *SN = cast<StoreSDNode>(N); + if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED) return NULL; SDValue N1 = N->getOperand(1); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index b6ff2c371d5c..b0b99bad1607 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -317,13 +317,13 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(sinkMBB); // sinkMBB: - // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] // ... BB = sinkMBB; BuildMI(*BB, BB->begin(), dl, TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB); + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -542,7 +542,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); - if (IsPIC) { + if (!IsPIC) { SDValue Ops[] = { JTI }; HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1); } else // Emit Load from Global Pointer diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index e948917eb80e..cff79966dcd3 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -1,4 +1,4 @@ -//===- MipsInstrFPU.td - Mips FPU Instruction Information -------*- C++ -*-===// +//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 0853272f7280..98ae2fa7da45 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -1,4 +1,4 @@ -//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===// +//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 6c09a3e10785..aaf307b1ce3f 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -30,53 +30,6 @@ static bool isZeroImm(const MachineOperand &op) { return op.isImm() && op.getImm() == 0; } -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -bool MipsInstrInfo:: -isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const -{ - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - - // addu $dst, $src, $zero || addu $dst, $zero, $src - // or $dst, $src, $zero || or $dst, $zero, $src - if ((MI.getOpcode() == Mips::ADDu) || (MI.getOpcode() == Mips::OR)) { - if (MI.getOperand(1).getReg() == Mips::ZERO) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - return true; - } else if (MI.getOperand(2).getReg() == Mips::ZERO) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - } - - // mov $fpDst, $fpSrc - // mfc $gpDst, $fpSrc - // mtc $fpDst, $gpSrc - if (MI.getOpcode() == Mips::FMOV_S32 || - MI.getOpcode() == Mips::FMOV_D32 || - MI.getOpcode() == Mips::MFC1 || - MI.getOpcode() == Mips::MTC1 || - MI.getOpcode() == Mips::MOVCCRToCCR) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - - // addiu $dst, $src, 0 - if (MI.getOpcode() == Mips::ADDiu) { - if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - } - - return false; -} - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index d6f87f9b0ce8..52a3d39840ba 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -174,12 +174,6 @@ public: /// virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 5337c9fb816a..320c5b883483 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -1,4 +1,4 @@ -//===- MipsInstrInfo.td - Mips Register defs --------------------*- C++ -*-===// +//===- MipsInstrInfo.td - Mips Register defs ---------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -96,12 +96,7 @@ def HI16 : SDNodeXForm<imm, [{ // Node immediate fits as 16-bit sign extended on target immediate. // e.g. addi, andi -def immSExt16 : PatLeaf<(imm), [{ - if (N->getValueType(0) == MVT::i32) - return (int32_t)N->getZExtValue() == (short)N->getZExtValue(); - else - return (int64_t)N->getZExtValue() == (short)N->getZExtValue(); -}]>; +def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; // Node immediate fits as 16-bit zero extended on target immediate. // The LO16 param means that only the lower 16 bits of the node diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index e15f0a58e501..69436d2acb54 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -327,10 +327,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // FrameIndex represent objects inside a abstract stack. // We must replace FrameIndex with an stack/frame pointer // direct reference. -unsigned MipsRegisterInfo:: +void MipsRegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value, RegScavenger *RS) const -{ + RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); @@ -361,7 +360,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, MI.getOperand(i-1).ChangeToImmediate(Offset); MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); - return 0; } void MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index b500a650f7cc..89282f8fa146 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -51,9 +51,8 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { MachineBasicBlock::iterator I) const; /// Stack Frame Processing Methods - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index be78a2266268..60efe31fbaf8 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -1,4 +1,4 @@ -//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===// +//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index 616a79bf831c..055ff3237218 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -1,4 +1,4 @@ -//===- MipsSchedule.td - Mips Scheduling Definitions ------------*- C++ -*-===// +//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt index cd4afe8e2342..2b6cb9e4e461 100644 --- a/lib/Target/PIC16/CMakeLists.txt +++ b/lib/Target/PIC16/CMakeLists.txt @@ -10,7 +10,7 @@ tablegen(PIC16GenDAGISel.inc -gen-dag-isel) tablegen(PIC16GenCallingConv.inc -gen-callingconv) tablegen(PIC16GenSubtarget.inc -gen-subtarget) -add_llvm_target(PIC16 +add_llvm_target(PIC16CodeGen PIC16DebugInfo.cpp PIC16InstrInfo.cpp PIC16ISelDAGToDAG.cpp diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h index cee55f4f260f..08bb3e6f055b 100644 --- a/lib/Target/PIC16/PIC16.h +++ b/lib/Target/PIC16/PIC16.h @@ -58,13 +58,10 @@ namespace PIC16CC { ESNames() {} public: ~ESNames() { - std::vector<char*>::iterator it = stk.end(); - it--; - while(stk.end() != stk.begin()) + while (!stk.empty()) { - char* p = *it; + char* p = stk.back(); delete [] p; - it--; stk.pop_back(); } } diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index 54a6a28992bf..527b31d0cc9f 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -312,6 +312,16 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) computeRegisterProperties(); } +std::pair<const TargetRegisterClass*, uint8_t> +PIC16TargetLowering::findRepresentativeClass(EVT VT) const { + switch (VT.getSimpleVT().SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(VT); + case MVT::i16: + return std::make_pair(PIC16::FSR16RegisterClass, 1); + } +} + // getOutFlag - Extract the flag result if the Op has it. static SDValue getOutFlag(SDValue &Op) { // Flag is the last value of the node. diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h index 0a7506cb497f..d942af46a9e9 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.h +++ b/lib/Target/PIC16/PIC16ISelLowering.h @@ -50,7 +50,7 @@ namespace llvm { CALL, // PIC16 Call instruction CALLW, // PIC16 CALLW instruction SUBCC, // Compare for equality or inequality. - SELECT_ICC, // Psuedo to be caught in schedular and expanded to brcond. + SELECT_ICC, // Pseudo to be caught in scheduler and expanded to brcond. BRCOND, // Conditional branch. RET, // Return. Dummy @@ -181,6 +181,9 @@ namespace llvm { // FIXME: The function never seems to be aligned. return 1; } + protected: + std::pair<const TargetRegisterClass*, uint8_t> + findRepresentativeClass(EVT VT) const; private: // If the Node is a BUILD_PAIR representing a direct Address, // then this function will return true. diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index e784f746f7f9..81257f3c4108 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -167,21 +167,6 @@ void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); } -bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DestReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - - if (MI.getOpcode() == PIC16::copy_fsr - || MI.getOpcode() == PIC16::copy_w) { - DestReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - - return false; -} - /// InsertBranch - Insert a branch into the end of the specified /// MachineBasicBlock. This operands to this method are the same as those /// returned by AnalyzeBranch. This is invoked in cases where AnalyzeBranch diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h index a3a77f11ba16..661b335d3b6c 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.h +++ b/lib/Target/PIC16/PIC16InstrInfo.h @@ -61,10 +61,6 @@ public: MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp index 241170b11c2a..b6aa38f765ea 100644 --- a/lib/Target/PIC16/PIC16MemSelOpt.cpp +++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp @@ -38,7 +38,7 @@ using namespace llvm; namespace { struct MemSelOpt : public MachineFunctionPass { static char ID; - MemSelOpt() : MachineFunctionPass(&ID) {} + MemSelOpt() : MachineFunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreservedID(MachineLoopInfoID); diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp index 27f1cf572ae6..56f021157092 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp @@ -256,7 +256,7 @@ PIC16Cloner::cloneFunction(Function *OrgF) { CloneAutos(OrgF); // Now create the clone. - ClonedF = CloneFunction(OrgF, VMap); + ClonedF = CloneFunction(OrgF, VMap, /*ModuleLevelChanges=*/false); // The new function should be for interrupt line. Therefore should have // the name suffixed with IL and section attribute marked with IL. diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h index e8b5aa45cdca..e7d67ce09629 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h @@ -35,7 +35,7 @@ namespace llvm { class PIC16Cloner : public ModulePass { public: static char ID; // Class identification - PIC16Cloner() : ModulePass(&ID) {} + PIC16Cloner() : ModulePass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<CallGraph>(); diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp index 5ecb6aa55157..0f8928a4b5f5 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp +++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp @@ -171,8 +171,9 @@ void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) { for (Module::iterator MI = M.begin(), E = M.end(); MI != E; ++MI) { for (Value::use_iterator I = MI->use_begin(), E = MI->use_end(); I != E; ++I) { - if ((!isa<CallInst>(I) && !isa<InvokeInst>(I)) - || !CallSite(cast<Instruction>(I)).isCallee(I)) { + User *U = *I; + if ((!isa<CallInst>(U) && !isa<InvokeInst>(U)) + || !CallSite(cast<Instruction>(U)).isCallee(I)) { setColor(MI, ++IndirectCallColor); break; } diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h index 5a2551fabcda..2f611e65de1f 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h +++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h @@ -39,7 +39,7 @@ namespace llvm { unsigned IndirectCallColor; public: static char ID; // Class identification - PIC16Overlay() : ModulePass(&ID) { + PIC16Overlay() : ModulePass(ID) { OverlayStr = "Overlay="; InterruptDepth = PIC16OVERLAY::StartInterruptColor; IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor; diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp index dff98d12c2ae..76de47fdf0f4 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.cpp +++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp @@ -44,13 +44,10 @@ bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const { return false; } -unsigned PIC16RegisterInfo:: +void PIC16RegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value, RegScavenger *RS) const -{ - /* NOT YET IMPLEMENTED */ - return 0; -} + RegScavenger *RS) const +{ /* NOT YET IMPLEMENTED */ } void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const { /* NOT YET IMPLEMENTED */ } diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h index 5536a617d2be..20052b003442 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.h +++ b/lib/Target/PIC16/PIC16RegisterInfo.h @@ -44,9 +44,8 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo { virtual BitVector getReservedRegs(const MachineFunction &MF) const; virtual bool hasFP(const MachineFunction &MF) const; - virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS=NULL) const; + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, RegScavenger *RS=NULL) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index e35dc579f2cd..c1a5663be931 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -43,6 +43,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -327,6 +328,19 @@ namespace { void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O, const char *Modifier); + + MachineLocation getDebugValueLocation(const MachineInstr *MI) const { + + MachineLocation Location; + assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm()); + else { + DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + } + return Location; + } }; /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 52948c868b9c..e161d23600e2 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -10,7 +10,7 @@ // This file contains a pass that scans a machine function to determine which // conditional branches need more than 16 bits of displacement to reach their // target basic block. It does this in two passes; a calculation of basic block -// positions pass, and a branch psuedo op to machine branch opcode pass. This +// positions pass, and a branch pseudo op to machine branch opcode pass. This // pass should be run last, just before the assembly printer. // //===----------------------------------------------------------------------===// @@ -31,7 +31,7 @@ STATISTIC(NumExpanded, "Number of branches expanded to long format"); namespace { struct PPCBSel : public MachineFunctionPass { static char ID; - PPCBSel() : MachineFunctionPass(&ID) {} + PPCBSel() : MachineFunctionPass(ID) {} /// BlockSizes - The sizes of the basic blocks in the function. std::vector<unsigned> BlockSizes; @@ -53,7 +53,8 @@ FunctionPass *llvm::createPPCBranchSelectionPass() { } bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { - const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + const PPCInstrInfo *TII = + static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo()); // Give the blocks of the function a dense, in-order, numbering. Fn.RenumberBlocks(); BlockSizes.resize(Fn.getNumBlockIDs()); diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 155fba22d9d7..441db94581ae 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -1,4 +1,4 @@ -//===- PPCCallingConv.td - Calling Conventions for PowerPC ------*- C++ -*-===// +//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 361fa70fb4c4..df9ab52389ba 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -45,7 +45,7 @@ namespace { public: PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(&ID), TM(tm), MCE(mce) {} + : MachineFunctionPass(ID), TM(tm), MCE(mce) {} /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for @@ -110,7 +110,7 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { default: MCE.emitWordBE(getBinaryCodeForInstr(MI)); break; - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index d47d989b34c0..14d1b154a5c9 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2467,18 +2467,31 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin; - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every - // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol - // node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - Callee.getValueType()); - else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); - else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) + bool needIndirectCall = true; + if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { // If this is an absolute destination address, use the munged value. Callee = SDValue(Dest, 0); - else { + needIndirectCall = false; + } + // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 + // Use indirect calls for ALL functions calls in JIT mode, since the + // far-call stubs may be outside relocation limits for a BL instruction. + if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + Callee.getValueType()); + needIndirectCall = false; + } + } + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), + Callee.getValueType()); + needIndirectCall = false; + } + if (needIndirectCall) { // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; @@ -3942,17 +3955,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } // t = vsplti c, result = vsldoi t, t, 1 - if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { + if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 - if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { + if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 - if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { + if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); } diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 1574aa3fb23a..c17108fa9230 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -18,8 +18,11 @@ #include "PPCGenInstrInfo.inc" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -36,67 +39,6 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm), RI(*TM.getSubtargetImpl(), *this) {} -bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned& sourceReg, - unsigned& destReg, - unsigned& sourceSubIdx, - unsigned& destSubIdx) const { - sourceSubIdx = destSubIdx = 0; // No sub-registers. - - unsigned oc = MI.getOpcode(); - if (oc == PPC::OR || oc == PPC::OR8 || oc == PPC::VOR || - oc == PPC::OR4To8 || oc == PPC::OR8To4) { // or r1, r2, r2 - assert(MI.getNumOperands() >= 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isReg() && - "invalid PPC OR instruction!"); - if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - } else if (oc == PPC::ADDI) { // addi r1, r2, 0 - assert(MI.getNumOperands() >= 3 && - MI.getOperand(0).isReg() && - MI.getOperand(2).isImm() && - "invalid PPC ADDI instruction!"); - if (MI.getOperand(1).isReg() && MI.getOperand(2).getImm() == 0) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - } else if (oc == PPC::ORI) { // ori r1, r2, 0 - assert(MI.getNumOperands() >= 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isImm() && - "invalid PPC ORI instruction!"); - if (MI.getOperand(2).getImm() == 0) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - } else if (oc == PPC::FMR || oc == PPC::FMRSD) { // fmr r1, r2 - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid PPC FMR instruction"); - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } else if (oc == PPC::MCRF) { // mcrf cr1, cr2 - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid PPC MCRF instruction"); - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - return false; -} - unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { @@ -524,6 +466,14 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); + + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, /*Offset=*/0, + MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); + NewMIs.back()->addMemOperand(MF, MMO); } void @@ -637,6 +587,14 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); + + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, /*Offset=*/0, + MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); + NewMIs.back()->addMemOperand(MF, MMO); } MachineInstr* @@ -667,7 +625,7 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { const char *AsmStr = MI->getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); } - case PPC::DBG_LABEL: + case PPC::PROLOG_LABEL: case PPC::EH_LABEL: case PPC::GC_LABEL: case PPC::DBG_VALUE: diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index eadb21e21702..fc7b7b3cb897 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -82,12 +82,6 @@ public: /// virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 63b4581a37f9..eb100ec75280 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1022,9 +1022,7 @@ let Uses = [RM] in { } } -/// FMR is split into 2 versions, one for 4/8 byte FP, and one for extending. -/// -/// Note that these are defined as pseudo-ops on the PPC970 because they are +/// Note that FMR is defined as pseudo-ops on the PPC970 because they are /// often coalesced away and we don't want the dispatch group builder to think /// that they will fill slots (which could cause the load of a LSU reject to /// sneak into a d-group with a store). @@ -1032,10 +1030,6 @@ def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), "fmr $frD, $frB", FPGeneral, []>, // (set F4RC:$frD, F4RC:$frB) PPC970_Unit_Pseudo; -def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB), - "fmr $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fextend F4RC:$frB))]>, - PPC970_Unit_Pseudo; let PPC970_Unit = 3 in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. @@ -1476,10 +1470,13 @@ def : Pat<(extloadi16 iaddr:$src), (LHZ iaddr:$src)>; def : Pat<(extloadi16 xaddr:$src), (LHZX xaddr:$src)>; -def : Pat<(extloadf32 iaddr:$src), - (FMRSD (LFS iaddr:$src))>; -def : Pat<(extloadf32 xaddr:$src), - (FMRSD (LFSX xaddr:$src))>; +def : Pat<(f64 (extloadf32 iaddr:$src)), + (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>; +def : Pat<(f64 (extloadf32 xaddr:$src)), + (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; + +def : Pat<(f64 (fextend F4RC:$src)), + (COPY_TO_REGCLASS F4RC:$src, F8RC)>; // Memory barriers def : Pat<(membarrier (i32 imm /*ll*/), diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 4d6132a9ec50..653e143ba407 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -449,8 +449,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Get stack alignments. unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); - assert(MaxAlign <= TargetAlign && - "Dynamic alloca with large aligns not supported"); + if (MaxAlign > TargetAlign) + report_fatal_error("Dynamic alloca with large aligns not supported"); // Determine the previous frame's address. If FrameSize can't be // represented as 16 bits or we need special alignment, then we load the @@ -580,10 +580,9 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, MBB.erase(II); } -unsigned +void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); // Get the instruction. @@ -622,14 +621,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (FPSI && FrameIndex == FPSI && (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { lowerDynamicAlloc(II, SPAdj, RS); - return 0; + return; } // Special case for pseudo-op SPILL_CR. if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default. if (OpC == PPC::SPILL_CR) { lowerCRSpilling(II, FrameIndex, SPAdj, RS); - return 0; + return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). @@ -674,7 +673,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); - return 0; + return; } // The offset doesn't fit into a single register, scavenge one to build the @@ -710,11 +709,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else { OperandBase = OffsetOperandNo; } - + unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); - return 0; } /// VRRegNo - Map from a numbered VR register to its enum value. @@ -1318,7 +1316,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel); // Show update of SP. if (NegFrameSize) { @@ -1361,7 +1359,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { ReadyLabel = MMI.getContext().CreateTempSymbol(); // Mark effective beginning of when frame pointer is ready. - BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(ReadyLabel); + BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel); MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) : (isPPC64 ? PPC::X1 : PPC::R1)); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index f026847a540b..890b24b9c0a8 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -63,9 +63,8 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 40914ba62a70..5d46065d96f2 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -69,6 +69,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS, , HasFSQRT(false) , HasSTFIWX(false) , HasLazyResolverStubs(false) + , IsJITCodeModel(false) , DarwinVers(0) { // Determine default and user specified characteristics @@ -117,6 +118,9 @@ void PPCSubtarget::SetJITMode() { // everything is. This matters for PPC64, which codegens in PIC mode without // stubs. HasLazyResolverStubs = false; + + // Calls to external functions need to use indirect calls + IsJITCodeModel = true; } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 75fcf6238a27..00ec7474c9e3 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -63,6 +63,7 @@ protected: bool HasFSQRT; bool HasSTFIWX; bool HasLazyResolverStubs; + bool IsJITCodeModel; /// DarwinVers - Nonzero if this is a darwin platform. Otherwise, the numeric /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. @@ -124,6 +125,9 @@ public: bool hasLazyResolverStub(const GlobalValue *GV, const TargetMachine &TM) const; + // isJITCodeModel - True if we're generating code for the JIT + bool isJITCodeModel() const { return IsJITCodeModel; } + // Specific obvious features. bool hasFSQRT() const { return HasFSQRT; } bool hasSTFIWX() const { return HasSTFIWX; } diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 4d7ee08de1de..4faf8bcfd419 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -1919,5 +1919,21 @@ something like the following, which eliminates a branch: ret .LBB0_2: jmp foo # TAILCALL +//===---------------------------------------------------------------------===// +Given a branch where the two target blocks are identical ("ret i32 %b" in +both), simplifycfg will simplify them away. But not so for a switch statement: + +define i32 @f(i32 %a, i32 %b) nounwind readnone { +entry: + switch i32 %a, label %bb3 [ + i32 4, label %bb + i32 6, label %bb + ] +bb: ; preds = %entry, %entry + ret i32 %b + +bb3: ; preds = %entry + ret i32 %b +} //===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 9e148ada8853..aae5da856005 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -32,7 +32,7 @@ namespace { static char ID; Filler(TargetMachine &tm) - : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { } + : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } virtual const char *getPassName() const { return "SPARC Delay Slot Filler"; diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp index 88b0927b3550..1423b1e64d66 100644 --- a/lib/Target/Sparc/FPMover.cpp +++ b/lib/Target/Sparc/FPMover.cpp @@ -36,7 +36,7 @@ namespace { static char ID; explicit FPMover(TargetMachine &tm) - : MachineFunctionPass(&ID), TM(tm) { } + : MachineFunctionPass(ID), TM(tm) { } virtual const char *getPassName() const { return "Sparc Double-FP Move Fixer"; diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td index 925d782d988b..764336665d0b 100644 --- a/lib/Target/Sparc/Sparc.td +++ b/lib/Target/Sparc/Sparc.td @@ -1,4 +1,4 @@ -//===- Sparc.td - Describe the Sparc Target Machine -------------*- C++ -*-===// +//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 698923e3c9e0..4ea94c4cb560 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -84,7 +84,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr, if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - if (Predicate_simm13(CN)) { + if (isInt<13>(CN->getSExtValue())) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { // Constant offset from frame ref. @@ -120,9 +120,9 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr, return false; // direct calls. if (Addr.getOpcode() == ISD::ADD) { - if (isa<ConstantSDNode>(Addr.getOperand(1)) && - Predicate_simm13(Addr.getOperand(1).getNode())) - return false; // Let the reg+imm pattern catch this! + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) + if (isInt<13>(CN->getSExtValue())) + return false; // Let the reg+imm pattern catch this! if (Addr.getOperand(0).getOpcode() == SPISD::Lo || Addr.getOperand(1).getOpcode() == SPISD::Lo) return false; // Let the reg+imm pattern catch this! diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 3a4c80ad076a..7ede8e7ebbe4 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -28,46 +28,6 @@ SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST) RI(ST, *this), Subtarget(ST) { } -static bool isZeroImm(const MachineOperand &op) { - return op.isImm() && op.getImm() == 0; -} - -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -/// -bool SparcInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSR, unsigned &DstSR) const { - SrcSR = DstSR = 0; // No sub-registers. - - // We look for 3 kinds of patterns here: - // or with G0 or 0 - // add with G0 or 0 - // fmovs or FpMOVD (pseudo double move). - if (MI.getOpcode() == SP::ORrr || MI.getOpcode() == SP::ADDrr) { - if (MI.getOperand(1).getReg() == SP::G0) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - return true; - } else if (MI.getOperand(2).getReg() == SP::G0) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - } else if ((MI.getOpcode() == SP::ORri || MI.getOpcode() == SP::ADDri) && - isZeroImm(MI.getOperand(2)) && MI.getOperand(1).isReg()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } else if (MI.getOpcode() == SP::FMOVS || MI.getOpcode() == SP::FpMOVD || - MI.getOpcode() == SP::FMOVD) { - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - return true; - } - return false; -} - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index 133471857bad..c00bd2198765 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -43,12 +43,6 @@ public: /// virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index ddadd51a93a4..467ed48487ad 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -43,17 +43,9 @@ def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">; // Instruction Pattern Stuff //===----------------------------------------------------------------------===// -def simm11 : PatLeaf<(imm), [{ - // simm11 predicate - True if the imm fits in a 11-bit sign extended field. - return (((int)N->getZExtValue() << (32-11)) >> (32-11)) == - (int)N->getZExtValue(); -}]>; +def simm11 : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>; -def simm13 : PatLeaf<(imm), [{ - // simm13 predicate - True if the imm fits in a 13-bit sign extended field. - return (((int)N->getZExtValue() << (32-13)) >> (32-13)) == - (int)N->getZExtValue(); -}]>; +def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>; def LO10 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023, diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 427cc7fd4577..c85db20d2b74 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -69,10 +69,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -unsigned +void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -108,7 +107,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(SP::G1, false); MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1)); } - return 0; } void SparcRegisterInfo:: diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 9f0cda707b3e..020ce567c956 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -40,9 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index c03864fe41e4..367bed3a8539 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -141,31 +141,6 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); } -bool -SystemZInstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - switch (MI.getOpcode()) { - default: - return false; - case SystemZ::MOV32rr: - case SystemZ::MOV64rr: - case SystemZ::MOV64rrP: - case SystemZ::MOV128rr: - case SystemZ::FMOV32rr: - case SystemZ::FMOV64rr: - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid register-register move instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SrcSubIdx = MI.getOperand(1).getSubReg(); - DstSubIdx = MI.getOperand(0).getSubReg(); - return true; - } -} - unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 0559619248a6..c248f2489c49 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -65,9 +65,6 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - bool isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index ae96b0b08ff6..f8d3e6ac8a6f 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -92,10 +92,9 @@ int SystemZRegisterInfo::getFrameIndexOffset(const MachineFunction &MF, return Offset; } -unsigned +void SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unxpected"); unsigned i = 0; @@ -117,13 +116,13 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Offset is a either 12-bit unsigned or 20-bit signed integer. // FIXME: handle "too long" displacements. - int Offset = getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm(); + int Offset = + getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm(); // Check whether displacement is too long to fit into 12 bit zext field. MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset)); MI.getOperand(i+1).ChangeToImmediate(Offset); - return 0; } void diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 670025f86e08..5dae865cb79a 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -34,7 +34,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasReservedCallFrame(MachineFunction &MF) const { return true; } + bool hasReservedCallFrame(const MachineFunction &MF) const { return true; } bool hasFP(const MachineFunction &MF) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; @@ -43,9 +43,8 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 5870d8a87004..f35c96dadcee 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -34,8 +34,7 @@ using namespace llvm; // Handle the Pass registration stuff necessary to use TargetData's. // Register the default SparcV9 implementation... -static RegisterPass<TargetData> X("targetdata", "Target Data Layout", false, - true); +INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true); char TargetData::ID = 0; //===----------------------------------------------------------------------===// @@ -98,8 +97,8 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { //===----------------------------------------------------------------------===// TargetAlignElem -TargetAlignElem::get(AlignTypeEnum align_type, unsigned char abi_align, - unsigned char pref_align, uint32_t bit_width) { +TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align, + unsigned pref_align, uint32_t bit_width) { assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); TargetAlignElem retval; retval.AlignType = align_type; @@ -197,10 +196,10 @@ void TargetData::init(StringRef Desc) { } unsigned Size = getInt(Specifier.substr(1)); Split = Token.split(':'); - unsigned char ABIAlign = getInt(Split.first) / 8; + unsigned ABIAlign = getInt(Split.first) / 8; Split = Split.second.split(':'); - unsigned char PrefAlign = getInt(Split.first) / 8; + unsigned PrefAlign = getInt(Split.first) / 8; if (PrefAlign == 0) PrefAlign = ABIAlign; setAlignment(AlignType, ABIAlign, PrefAlign, Size); @@ -227,19 +226,19 @@ void TargetData::init(StringRef Desc) { /// /// @note This has to exist, because this is a pass, but it should never be /// used. -TargetData::TargetData() : ImmutablePass(&ID) { +TargetData::TargetData() : ImmutablePass(ID) { report_fatal_error("Bad TargetData ctor used. " "Tool did not specify a TargetData to use?"); } TargetData::TargetData(const Module *M) - : ImmutablePass(&ID) { + : ImmutablePass(ID) { init(M->getDataLayout()); } void -TargetData::setAlignment(AlignTypeEnum align_type, unsigned char abi_align, - unsigned char pref_align, uint32_t bit_width) { +TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align, + unsigned pref_align, uint32_t bit_width) { assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { if (Alignments[i].AlignType == align_type && @@ -455,15 +454,6 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. return getStructLayout(cast<StructType>(Ty))->getSizeInBits(); - case Type::UnionTyID: { - const UnionType *UnTy = cast<UnionType>(Ty); - uint64_t Size = 0; - for (UnionType::element_iterator i = UnTy->element_begin(), - e = UnTy->element_end(); i != e; ++i) { - Size = std::max(Size, getTypeSizeInBits(*i)); - } - return Size; - } case Type::IntegerTyID: return cast<IntegerType>(Ty)->getBitWidth(); case Type::VoidTyID: @@ -496,7 +486,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref == false) for the requested type \a Ty. */ -unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { +unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { int AlignType = -1; assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); @@ -518,18 +508,7 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { // Get the layout annotation... which is lazily created on demand. const StructLayout *Layout = getStructLayout(cast<StructType>(Ty)); unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty); - return std::max(Align, (unsigned)Layout->getAlignment()); - } - case Type::UnionTyID: { - const UnionType *UnTy = cast<UnionType>(Ty); - unsigned Align = 1; - - // Unions need the maximum alignment of all their entries - for (UnionType::element_iterator i = UnTy->element_begin(), - e = UnTy->element_end(); i != e; ++i) { - Align = std::max(Align, (unsigned)getAlignment(*i, abi_or_pref)); - } - return Align; + return std::max(Align, Layout->getAlignment()); } case Type::IntegerTyID: case Type::VoidTyID: @@ -556,18 +535,18 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { abi_or_pref, Ty); } -unsigned char TargetData::getABITypeAlignment(const Type *Ty) const { +unsigned TargetData::getABITypeAlignment(const Type *Ty) const { return getAlignment(Ty, true); } /// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for /// an integer type of the specified bitwidth. -unsigned char TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const { +unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const { return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0); } -unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const { +unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const { for (unsigned i = 0, e = Alignments.size(); i != e; ++i) if (Alignments[i].AlignType == STACK_ALIGN) return Alignments[i].ABIAlign; @@ -575,12 +554,12 @@ unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const { return getABITypeAlignment(Ty); } -unsigned char TargetData::getPrefTypeAlignment(const Type *Ty) const { +unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const { return getAlignment(Ty, false); } -unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const { - unsigned Align = (unsigned) getPrefTypeAlignment(Ty); +unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const { + unsigned Align = getPrefTypeAlignment(Ty); assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); return Log2_32(Align); } @@ -615,18 +594,13 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, // Update Ty to refer to current element Ty = STy->getElementType(FieldNo); - } else if (const UnionType *UnTy = dyn_cast<UnionType>(*TI)) { - unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue(); - - // Offset into union is canonically 0, but type changes - Ty = UnTy->getElementType(FieldNo); } else { // Update Ty to refer to current element Ty = cast<SequentialType>(Ty)->getElementType(); // Get the array index and the size of each array element. if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue()) - Result += arrayIdx * (int64_t)getTypeAllocSize(Ty); + Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty); } } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 47c91df1400e..705b1c097e55 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -30,7 +30,8 @@ namespace llvm { bool NoFramePointerElimNonLeaf; bool NoExcessFPPrecision; bool UnsafeFPMath; - bool FiniteOnlyFPMathOption; + bool NoInfsFPMath; + bool NoNaNsFPMath; bool HonorSignDependentRoundingFPMathOption; bool UseSoftFloat; FloatABI::ABIType FloatABIType; @@ -80,9 +81,14 @@ EnableUnsafeFPMath("enable-unsafe-fp-math", cl::location(UnsafeFPMath), cl::init(false)); static cl::opt<bool, true> -EnableFiniteOnlyFPMath("enable-finite-only-fp-math", - cl::desc("Enable optimizations that assumes non- NaNs / +-Infs"), - cl::location(FiniteOnlyFPMathOption), +EnableNoInfsFPMath("enable-no-infs-fp-math", + cl::desc("Enable FP math optimizations that assume no +-Infs"), + cl::location(NoInfsFPMath), + cl::init(false)); +static cl::opt<bool, true> +EnableNoNaNsFPMath("enable-no-nans-fp-math", + cl::desc("Enable FP math optimizations that assume no NaNs"), + cl::location(NoNaNsFPMath), cl::init(false)); static cl::opt<bool, true> EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math", @@ -290,12 +296,6 @@ namespace llvm { /// result is "less precise" than doing those operations individually. bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; } - /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math - /// option is specified on the command line. If this returns false (default), - /// the code generator is not allowed to assume that FP arithmetic arguments - /// and results are never NaNs or +-Infs. - bool FiniteOnlyFPMath() { return FiniteOnlyFPMathOption; } - /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume /// that the rounding mode of the FPU can change from its default. bool HonorSignDependentRoundingFPMath() { diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index 49bfad54136d..55f222c7c1c9 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -63,7 +63,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { /// getAllocatableSetForRC - Toggle the bits that represent allocatable /// registers for the specific register class. static void getAllocatableSetForRC(const MachineFunction &MF, - const TargetRegisterClass *RC, BitVector &R){ + const TargetRegisterClass *RC, BitVector &R){ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), E = RC->allocation_order_end(MF); I != E; ++I) R.set(*I); @@ -74,12 +74,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, BitVector Allocatable(NumRegs); if (RC) { getAllocatableSetForRC(MF, RC, Allocatable); - return Allocatable; + } else { + for (TargetRegisterInfo::regclass_iterator I = regclass_begin(), + E = regclass_end(); I != E; ++I) + getAllocatableSetForRC(MF, *I, Allocatable); } - for (TargetRegisterInfo::regclass_iterator I = regclass_begin(), - E = regclass_end(); I != E; ++I) - getAllocatableSetForRC(MF, *I, Allocatable); + // Mask out the reserved registers + BitVector Reserved = getReservedRegs(MF); + Allocatable ^= Reserved & Allocatable; + return Allocatable; } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index f1e66ab9d2c3..f8588d818b75 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -9,6 +9,8 @@ #include "llvm/Target/TargetAsmParser.h" #include "X86.h" +#include "X86Subtarget.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" @@ -19,6 +21,7 @@ #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmParser.h" using namespace llvm; @@ -28,6 +31,7 @@ struct X86Operand; class X86ATTAsmParser : public TargetAsmParser { MCAsmParser &Parser; + TargetMachine &TM; protected: unsigned Is64Bit : 1; @@ -37,8 +41,6 @@ private: MCAsmLexer &getLexer() const { return Parser.getLexer(); } - void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } - bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); @@ -48,13 +50,14 @@ private: bool ParseDirectiveWord(unsigned Size, SMLoc L); - void InstructionCleanup(MCInst &Inst); + bool MatchInstruction(SMLoc IDLoc, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCInst &Inst); - /// @name Auto-generated Match Functions + /// @name Auto-generated Matcher Functions /// { - bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCInst &Inst); + unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const; bool MatchInstructionImpl( const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst); @@ -62,27 +65,32 @@ private: /// } public: - X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) - : TargetAsmParser(T), Parser(_Parser) {} + X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) + : TargetAsmParser(T), Parser(_Parser), TM(TM) { + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures( + &TM.getSubtarget<X86Subtarget>())); + } virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); }; - + class X86_32ATTAsmParser : public X86ATTAsmParser { public: - X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser) - : X86ATTAsmParser(T, _Parser) { + X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) + : X86ATTAsmParser(T, _Parser, TM) { Is64Bit = false; } }; class X86_64ATTAsmParser : public X86ATTAsmParser { public: - X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser) - : X86ATTAsmParser(T, _Parser) { + X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) + : X86ATTAsmParser(T, _Parser, TM) { Is64Bit = true; } }; @@ -90,7 +98,7 @@ public: } // end anonymous namespace /// @name Auto-generated Match Functions -/// { +/// { static unsigned MatchRegisterName(StringRef Name); @@ -109,7 +117,7 @@ struct X86Operand : public MCParsedAsmOperand { } Kind; SMLoc StartLoc, EndLoc; - + union { struct { const char *Data; @@ -141,6 +149,8 @@ struct X86Operand : public MCParsedAsmOperand { /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } + virtual void dump(raw_ostream &OS) const {} + StringRef getToken() const { assert(Kind == Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); @@ -185,7 +195,7 @@ struct X86Operand : public MCParsedAsmOperand { bool isToken() const {return Kind == Token; } bool isImm() const { return Kind == Immediate; } - + bool isImmSExti16i8() const { if (!isImm()) return false; @@ -260,10 +270,6 @@ struct X86Operand : public MCParsedAsmOperand { !getMemIndexReg() && getMemScale() == 1; } - bool isNoSegMem() const { - return Kind == Memory && !getMemSegReg(); - } - bool isReg() const { return Kind == Register; } void addExpr(MCInst &Inst, const MCExpr *Expr) const { @@ -298,14 +304,6 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); } - void addNoSegMemOperands(MCInst &Inst, unsigned N) const { - assert((N == 4) && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); - Inst.addOperand(MCOperand::CreateImm(getMemScale())); - Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); - addExpr(Inst, getMemDisp()); - } - static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { X86Operand *Res = new X86Operand(Token, Loc, Loc); Res->Tok.Data = Str.data(); @@ -376,13 +374,19 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, // FIXME: Validate register for the current architecture; we have to do // validation later, so maybe there is no need for this here. RegNo = MatchRegisterName(Tok.getString()); - + + // FIXME: This should be done using Requires<In32BitMode> and + // Requires<In64BitMode> so "eiz" usage in 64-bit instructions + // can be also checked. + if (RegNo == X86::RIZ && !Is64Bit) + return Error(Tok.getLoc(), "riz register in 64-bit mode only"); + // Parse %st(1) and "%st" as "%st(0)" if (RegNo == 0 && Tok.getString() == "st") { RegNo = X86::ST0; EndLoc = Tok.getLoc(); Parser.Lex(); // Eat 'st' - + // Check to see if we have '(4)' after %st. if (getLexer().isNot(AsmToken::LParen)) return false; @@ -403,15 +407,15 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, case 7: RegNo = X86::ST7; break; default: return Error(IntTok.getLoc(), "invalid stack index"); } - + if (getParser().Lex().isNot(AsmToken::RParen)) return Error(Parser.getTok().getLoc(), "expected ')'"); - + EndLoc = Tok.getLoc(); Parser.Lex(); // Eat ')' return false; } - + // If this is "db[0-7]", match it as an alias // for dr[0-7]. if (RegNo == 0 && Tok.getString().size() == 3 && @@ -426,14 +430,14 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, case '6': RegNo = X86::DR6; break; case '7': RegNo = X86::DR7; break; } - + if (RegNo != 0) { EndLoc = Tok.getLoc(); Parser.Lex(); // Eat it. return false; } } - + if (RegNo == 0) return Error(Tok.getLoc(), "invalid register name"); @@ -452,13 +456,17 @@ X86Operand *X86ATTAsmParser::ParseOperand() { unsigned RegNo; SMLoc Start, End; if (ParseRegister(RegNo, Start, End)) return 0; - + if (RegNo == X86::EIZ || RegNo == X86::RIZ) { + Error(Start, "eiz and riz can only be used as index registers"); + return 0; + } + // If this is a segment register followed by a ':', then this is the start // of a memory reference, otherwise this is a normal register reference. if (getLexer().isNot(AsmToken::Colon)) return X86Operand::CreateReg(RegNo, Start, End); - - + + getParser().Lex(); // Eat the colon. return ParseMemOperand(RegNo, Start); } @@ -477,7 +485,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() { /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix /// has already been parsed if present. X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { - + // We have to disambiguate a parenthesized expression "(4+5)" from the start // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The // only way to do this without lookahead is to eat the '(' and see what is @@ -486,7 +494,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; if (getParser().ParseExpression(Disp, ExprEnd)) return 0; - + // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. if (getLexer().isNot(AsmToken::LParen)) { @@ -495,7 +503,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { return X86Operand::CreateMem(Disp, MemStart, ExprEnd); return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); } - + // Eat the '('. Parser.Lex(); } else { @@ -503,17 +511,17 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // so we have to eat the ( to see beyond it. SMLoc LParenLoc = Parser.getTok().getLoc(); Parser.Lex(); // Eat the '('. - + if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { // Nothing to do here, fall into the code below with the '(' part of the // memory operand consumed. } else { SMLoc ExprEnd; - + // It must be an parenthesized expression, parse it now. if (getParser().ParseParenExpression(Disp, ExprEnd)) return 0; - + // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. if (getLexer().isNot(AsmToken::LParen)) { @@ -522,21 +530,25 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); } - + // Eat the '('. Parser.Lex(); } } - + // If we reached here, then we just ate the ( of the memory operand. Process // the rest of the memory operand. unsigned BaseReg = 0, IndexReg = 0, Scale = 1; - + if (getLexer().is(AsmToken::Percent)) { SMLoc L; if (ParseRegister(BaseReg, L, L)) return 0; + if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { + Error(L, "eiz and riz can only be used as index registers"); + return 0; + } } - + if (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. @@ -545,11 +557,11 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // correctly. // // Not that even though it would be completely consistent to support syntax - // like "1(%eax,,1)", the assembler doesn't. + // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. if (getLexer().is(AsmToken::Percent)) { SMLoc L; if (ParseRegister(IndexReg, L, L)) return 0; - + if (getLexer().isNot(AsmToken::RParen)) { // Parse the scale amount: // ::= ',' [scale-expression] @@ -566,7 +578,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { int64_t ScaleVal; if (getParser().ParseAbsoluteExpression(ScaleVal)) return 0; - + // Validate the scale amount. if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); @@ -576,19 +588,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { } } } else if (getLexer().isNot(AsmToken::RParen)) { - // Otherwise we have the unsupported form of a scale amount without an + // A scale amount without an index is ignored. // index. SMLoc Loc = Parser.getTok().getLoc(); int64_t Value; if (getParser().ParseAbsoluteExpression(Value)) return 0; - - Error(Loc, "cannot have scale factor without index register"); - return 0; + + if (Value != 1) + Warning(Loc, "scale factor without index register is ignored"); + Scale = 1; } } - + // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. if (getLexer().isNot(AsmToken::RParen)) { Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); @@ -596,7 +609,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { } SMLoc MemEnd = Parser.getTok().getLoc(); Parser.Lex(); // Eat the ')'. - + return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, MemStart, MemEnd); } @@ -743,6 +756,23 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, } } } + + // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq + if (PatchedName.startswith("vpclmul")) { + unsigned CLMULQuadWordSelect = StringSwitch<unsigned>( + PatchedName.slice(7, PatchedName.size() - 2)) + .Case("lqlq", 0x00) // src1[63:0], src2[63:0] + .Case("hqlq", 0x01) // src1[127:64], src2[63:0] + .Case("lqhq", 0x10) // src1[63:0], src2[127:64] + .Case("hqhq", 0x11) // src1[127:64], src2[127:64] + .Default(~0U); + if (CLMULQuadWordSelect != ~0U) { + ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect, + getParser().getContext()); + assert(PatchedName.endswith("dq") && "Unexpected mnemonic!"); + PatchedName = "vpclmulqdq"; + } + } Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (ExtraImmOp) @@ -785,6 +815,20 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, Operands.erase(Operands.begin() + 1); } + // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx". + if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && + Operands.size() == 3) { + X86Operand &Op = *(X86Operand*)Operands.back(); + if (Op.isMem() && Op.Mem.SegReg == 0 && + isa<MCConstantExpr>(Op.Mem.Disp) && + cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { + SMLoc Loc = Op.getEndLoc(); + Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); + delete &Op; + } + } + // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as // "f{mul*,add*,sub*,div*} $op" if ((Name.startswith("fmul") || Name.startswith("fadd") || @@ -796,6 +840,16 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, Operands.erase(Operands.begin() + 2); } + // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B, + // B". + if (Name.startswith("imul") && Operands.size() == 3 && + static_cast<X86Operand*>(Operands[1])->isImm() && + static_cast<X86Operand*>(Operands.back())->isReg()) { + X86Operand *Op = static_cast<X86Operand*>(Operands.back()); + Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(), + Op->getEndLoc())); + } + return false; } @@ -819,7 +873,7 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().is(AsmToken::EndOfStatement)) break; - + // FIXME: Improve diagnostic. if (getLexer().isNot(AsmToken::Comma)) return Error(L, "unexpected token in directive"); @@ -831,82 +885,32 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } -/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a -/// imm operand, to having "rm" or "mr" operands with the offset in the disp -/// field. -static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo, - bool isMR) { - MCOperand Disp = Inst.getOperand(0); - - // Start over with an empty instruction. - Inst = MCInst(); - Inst.setOpcode(Opc); - - if (!isMR) - Inst.addOperand(MCOperand::CreateReg(RegNo)); - - // Add the mem operand. - Inst.addOperand(MCOperand::CreateReg(0)); // Segment - Inst.addOperand(MCOperand::CreateImm(1)); // Scale - Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg - Inst.addOperand(Disp); // Displacement - Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg - - if (isMR) - Inst.addOperand(MCOperand::CreateReg(RegNo)); -} - -// FIXME: Custom X86 cleanup function to implement a temporary hack to handle -// matching INCL/DECL correctly for x86_64. This needs to be replaced by a -// proper mechanism for supporting (ambiguous) feature dependent instructions. -void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) { - if (!Is64Bit) return; - - switch (Inst.getOpcode()) { - case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break; - case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break; - case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break; - case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break; - case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break; - case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break; - case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break; - case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break; - - // moffset instructions are x86-32 only. - case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break; - case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break; - case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break; - case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break; - case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break; - case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break; - } -} bool -X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> +X86ATTAsmParser::MatchInstruction(SMLoc IDLoc, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst) { + assert(!Operands.empty() && "Unexpect empty operand list!"); + + X86Operand *Op = static_cast<X86Operand*>(Operands[0]); + assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + // First, try a direct match. if (!MatchInstructionImpl(Operands, Inst)) return false; - // Ignore anything which is obviously not a suffix match. - if (Operands.size() == 0) - return true; - X86Operand *Op = static_cast<X86Operand*>(Operands[0]); - if (!Op->isToken() || Op->getToken().size() > 15) - return true; - // FIXME: Ideally, we would only attempt suffix matches for things which are // valid prefixes, and we could just infer the right unambiguous // type. However, that requires substantially more matcher support than the // following hack. // Change the operand to point to a temporary token. - char Tmp[16]; StringRef Base = Op->getToken(); - memcpy(Tmp, Base.data(), Base.size()); - Op->setTokenValue(StringRef(Tmp, Base.size() + 1)); + SmallString<16> Tmp; + Tmp += Base; + Tmp += ' '; + Op->setTokenValue(Tmp.str()); // Check for the various suffix matches. Tmp[Base.size()] = 'b'; @@ -928,6 +932,38 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> return false; // Otherwise, the match failed. + + // If we had multiple suffix matches, then identify this as an ambiguous + // match. + if (MatchB + MatchW + MatchL + MatchQ != 4) { + char MatchChars[4]; + unsigned NumMatches = 0; + if (!MatchB) + MatchChars[NumMatches++] = 'b'; + if (!MatchW) + MatchChars[NumMatches++] = 'w'; + if (!MatchL) + MatchChars[NumMatches++] = 'l'; + if (!MatchQ) + MatchChars[NumMatches++] = 'q'; + + SmallString<126> Msg; + raw_svector_ostream OS(Msg); + OS << "ambiguous instructions require an explicit suffix (could be "; + for (unsigned i = 0; i != NumMatches; ++i) { + if (i != 0) + OS << ", "; + if (i + 1 == NumMatches) + OS << "or "; + OS << "'" << Base << MatchChars[i] << "'"; + } + OS << ")"; + Error(IDLoc, OS.str()); + } else { + // FIXME: We should give nicer diagnostics about the exact failure. + Error(IDLoc, "unrecognized instruction"); + } + return true; } diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt index b70a587ec4e2..033973eeeff9 100644 --- a/lib/Target/X86/AsmPrinter/CMakeLists.txt +++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt @@ -2,8 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMX86AsmPrinter X86ATTInstPrinter.cpp - X86AsmPrinter.cpp X86IntelInstPrinter.cpp - X86MCInstLower.cpp + X86InstComments.cpp ) add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen) diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index f2cdb5ba55eb..554b96c96e0e 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" +#include "X86InstComments.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -31,6 +32,10 @@ using namespace llvm; void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { printInstruction(MI, OS); + + // If verbose assembly is enabled, we can print some informative comments. + if (CommentStream) + EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 3be4bae5bec2..eb986643014c 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -56,6 +56,9 @@ public: void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } + void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.cpp b/lib/Target/X86/AsmPrinter/X86InstComments.cpp new file mode 100644 index 000000000000..da9d5a3579e5 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86InstComments.cpp @@ -0,0 +1,232 @@ +//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines functionality used to emit comments about X86 instructions to +// an output stream for -fverbose-asm. +// +//===----------------------------------------------------------------------===// + +#include "X86InstComments.h" +#include "X86GenInstrNames.inc" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include "../X86ShuffleDecode.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Top Level Entrypoint +//===----------------------------------------------------------------------===// + +/// EmitAnyX86InstComments - This function decodes x86 instructions and prints +/// newline terminated strings to the specified string if desired. This +/// information is shown in disassembly dumps when verbose assembly is enabled. +void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, + const char *(*getRegName)(unsigned)) { + // If this is a shuffle operation, the switch should fill in this state. + SmallVector<unsigned, 8> ShuffleMask; + const char *DestName = 0, *Src1Name = 0, *Src2Name = 0; + + switch (MI->getOpcode()) { + case X86::INSERTPSrr: + Src1Name = getRegName(MI->getOperand(1).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); + break; + + case X86::MOVLHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodeMOVLHPSMask(2, ShuffleMask); + break; + + case X86::MOVHLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodeMOVHLPSMask(2, ShuffleMask); + break; + + case X86::PSHUFDri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PSHUFDmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + + case X86::PSHUFHWri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PSHUFHWmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::PSHUFLWri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PSHUFLWmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + + case X86::PUNPCKHBWrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHBWrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(16, ShuffleMask); + break; + case X86::PUNPCKHWDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHWDrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(8, ShuffleMask); + break; + case X86::PUNPCKHDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(4, ShuffleMask); + break; + case X86::PUNPCKHQDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHQDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(2, ShuffleMask); + break; + + case X86::PUNPCKLBWrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLBWrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(16, ShuffleMask); + break; + case X86::PUNPCKLWDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLWDrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(8, ShuffleMask); + break; + case X86::PUNPCKLDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(4, ShuffleMask); + break; + case X86::PUNPCKLQDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLQDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(2, ShuffleMask); + break; + + case X86::SHUFPDrri: + DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + break; + + case X86::SHUFPSrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::SHUFPSrmi: + DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + + case X86::UNPCKLPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKLPDrm: + DecodeUNPCKLPMask(2, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::UNPCKLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKLPSrm: + DecodeUNPCKLPMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::UNPCKHPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKHPDrm: + DecodeUNPCKHPMask(2, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::UNPCKHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKHPSrm: + DecodeUNPCKHPMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + } + + + // If this was a shuffle operation, print the shuffle mask. + if (!ShuffleMask.empty()) { + if (DestName == 0) DestName = Src1Name; + OS << (DestName ? DestName : "mem") << " = "; + + // If the two sources are the same, canonicalize the input elements to be + // from the first src so that we get larger element spans. + if (Src1Name == Src2Name) { + for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) { + if ((int)ShuffleMask[i] >= 0 && // Not sentinel. + ShuffleMask[i] >= e) // From second mask. + ShuffleMask[i] -= e; + } + } + + // The shuffle mask specifies which elements of the src1/src2 fill in the + // destination, with a few sentinel values. Loop through and print them + // out. + for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) { + if (i != 0) + OS << ','; + if (ShuffleMask[i] == SM_SentinelZero) { + OS << "zero"; + continue; + } + + // Otherwise, it must come from src1 or src2. Print the span of elements + // that comes from this src. + bool isSrc1 = ShuffleMask[i] < ShuffleMask.size(); + const char *SrcName = isSrc1 ? Src1Name : Src2Name; + OS << (SrcName ? SrcName : "mem") << '['; + bool IsFirst = true; + while (i != e && + (int)ShuffleMask[i] >= 0 && + (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) { + if (!IsFirst) + OS << ','; + else + IsFirst = false; + OS << ShuffleMask[i] % ShuffleMask.size(); + ++i; + } + OS << ']'; + --i; // For loop increments element #. + } + //MI->print(OS, 0); + OS << "\n"; + } + +} diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.h b/lib/Target/X86/AsmPrinter/X86InstComments.h new file mode 100644 index 000000000000..6b86db4f9e5c --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86InstComments.h @@ -0,0 +1,25 @@ +//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines functionality used to emit comments about X86 instructions to +// an output stream for -fverbose-asm. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_INST_COMMENTS_H +#define X86_INST_COMMENTS_H + +namespace llvm { + class MCInst; + class raw_ostream; + void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, + const char *(*getRegName)(unsigned)); +} + +#endif diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp index a632047f6592..5625b0ea618f 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" +#include "X86InstComments.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -30,6 +31,10 @@ using namespace llvm; void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { printInstruction(MI, OS); + + // If verbose assembly is enabled, we can print some informative comments. + if (CommentStream) + EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h index 4d680744dd60..6f120322742b 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -64,6 +64,10 @@ public: O << "XMMWORD PTR "; printMemReference(MI, OpNo, O); } + void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo, O); + } void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { O << "DWORD PTR "; printMemReference(MI, OpNo, O); diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 133482036ce1..e9399f5c8322 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -18,23 +18,24 @@ tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info) set(sources SSEDomainFix.cpp X86AsmBackend.cpp - X86CodeEmitter.cpp + X86AsmPrinter.cpp X86COFFMachineModuleInfo.cpp + X86CodeEmitter.cpp X86ELFWriterInfo.cpp + X86FastISel.cpp X86FloatingPoint.cpp - X86FloatingPointRegKill.cpp X86ISelDAGToDAG.cpp X86ISelLowering.cpp X86InstrInfo.cpp X86JITInfo.cpp X86MCAsmInfo.cpp X86MCCodeEmitter.cpp + X86MCInstLower.cpp X86RegisterInfo.cpp + X86SelectionDAGInfo.cpp X86Subtarget.cpp X86TargetMachine.cpp X86TargetObjectFile.cpp - X86FastISel.cpp - X86SelectionDAGInfo.cpp ) if( CMAKE_CL_64 ) @@ -49,4 +50,3 @@ endif() add_llvm_target(X86CodeGen ${sources}) -target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG) diff --git a/lib/Target/X86/README-FPStack.txt b/lib/Target/X86/README-FPStack.txt index be28e8b394a4..39efd2dbcf1a 100644 --- a/lib/Target/X86/README-FPStack.txt +++ b/lib/Target/X86/README-FPStack.txt @@ -27,8 +27,8 @@ def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, //===---------------------------------------------------------------------===// -The FP stackifier needs to be global. Also, it should handle simple permutates -to reduce number of shuffle instructions, e.g. turning: +The FP stackifier should handle simple permutates to reduce number of shuffle +instructions, e.g. turning: fld P -> fld Q fld Q fld P diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index b6aba93f3738..f96b22f1e204 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -2,8 +2,46 @@ // Random ideas for the X86 backend: SSE-specific stuff. //===---------------------------------------------------------------------===// -- Consider eliminating the unaligned SSE load intrinsics, replacing them with - unaligned LLVM load instructions. +//===---------------------------------------------------------------------===// + +SSE Variable shift can be custom lowered to something like this, which uses a +small table + unaligned load + shuffle instead of going through memory. + +__m128i_shift_right: + .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + .byte -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + +... +__m128i shift_right(__m128i value, unsigned long offset) { + return _mm_shuffle_epi8(value, + _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset))); +} + +//===---------------------------------------------------------------------===// + +SSE has instructions for doing operations on complex numbers, we should pattern +match them. Compiling this: + +_Complex float f32(_Complex float A, _Complex float B) { + return A+B; +} + +into: + +_f32: + movdqa %xmm0, %xmm2 + addss %xmm1, %xmm2 + pshufd $16, %xmm2, %xmm2 + pshufd $1, %xmm1, %xmm1 + pshufd $1, %xmm0, %xmm0 + addss %xmm1, %xmm0 + pshufd $16, %xmm0, %xmm1 + movdqa %xmm2, %xmm0 + unpcklps %xmm1, %xmm0 + ret + +seems silly. + //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index efc0cd82f23e..a305ae6ec550 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1135,13 +1135,6 @@ void test(double *P) { //===---------------------------------------------------------------------===// -handling llvm.memory.barrier on pre SSE2 cpus - -should generate: -lock ; mov %esp, %esp - -//===---------------------------------------------------------------------===// - The generated code on x86 for checking for signed overflow on a multiply the obvious way is much longer than it needs to be. @@ -1870,3 +1863,100 @@ The code produced by gcc is 3 bytes shorter. This sort of construct often shows up with bitfields. //===---------------------------------------------------------------------===// + +Take the following C code: +int f(int a, int b) { return (unsigned char)a == (unsigned char)b; } + +We generate the following IR with clang: +define i32 @f(i32 %a, i32 %b) nounwind readnone { +entry: + %tmp = xor i32 %b, %a ; <i32> [#uses=1] + %tmp6 = and i32 %tmp, 255 ; <i32> [#uses=1] + %cmp = icmp eq i32 %tmp6, 0 ; <i1> [#uses=1] + %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1] + ret i32 %conv5 +} + +And the following x86 code: + xorl %esi, %edi + testb $-1, %dil + sete %al + movzbl %al, %eax + ret + +A cmpb instead of the xorl+testb would be one instruction shorter. + +//===---------------------------------------------------------------------===// + +Given the following C code: +int f(int a, int b) { return (signed char)a == (signed char)b; } + +We generate the following IR with clang: +define i32 @f(i32 %a, i32 %b) nounwind readnone { +entry: + %sext = shl i32 %a, 24 ; <i32> [#uses=1] + %conv1 = ashr i32 %sext, 24 ; <i32> [#uses=1] + %sext6 = shl i32 %b, 24 ; <i32> [#uses=1] + %conv4 = ashr i32 %sext6, 24 ; <i32> [#uses=1] + %cmp = icmp eq i32 %conv1, %conv4 ; <i1> [#uses=1] + %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1] + ret i32 %conv5 +} + +And the following x86 code: + movsbl %sil, %eax + movsbl %dil, %ecx + cmpl %eax, %ecx + sete %al + movzbl %al, %eax + ret + + +It should be possible to eliminate the sign extensions. + +//===---------------------------------------------------------------------===// + +LLVM misses a load+store narrowing opportunity in this code: + +%struct.bf = type { i64, i16, i16, i32 } + +@bfi = external global %struct.bf* ; <%struct.bf**> [#uses=2] + +define void @t1() nounwind ssp { +entry: + %0 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] + %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1] + %2 = bitcast i16* %1 to i32* ; <i32*> [#uses=2] + %3 = load i32* %2, align 1 ; <i32> [#uses=1] + %4 = and i32 %3, -65537 ; <i32> [#uses=1] + store i32 %4, i32* %2, align 1 + %5 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] + %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1] + %7 = bitcast i16* %6 to i32* ; <i32*> [#uses=2] + %8 = load i32* %7, align 1 ; <i32> [#uses=1] + %9 = and i32 %8, -131073 ; <i32> [#uses=1] + store i32 %9, i32* %7, align 1 + ret void +} + +LLVM currently emits this: + + movq bfi(%rip), %rax + andl $-65537, 8(%rax) + movq bfi(%rip), %rax + andl $-131073, 8(%rax) + ret + +It could narrow the loads and stores to emit this: + + movq bfi(%rip), %rax + andb $-2, 10(%rax) + movq bfi(%rip), %rax + andb $-3, 10(%rax) + ret + +The trouble is that there is a TokenFactor between the store and the +load, making it non-trivial to determine if there's anything between +the load and the store which would prohibit narrowing. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp index dab070e1febd..13680c592e01 100644 --- a/lib/Target/X86/SSEDomainFix.cpp +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -115,7 +115,7 @@ class SSEDomainFixPass : public MachineFunctionPass { unsigned Distance; public: - SSEDomainFixPass() : MachineFunctionPass(&ID) {} + SSEDomainFixPass() : MachineFunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 677781d3730e..27e88505150b 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -49,11 +49,6 @@ FunctionPass *createX86FloatingPointStackifierPass(); /// crossings. FunctionPass *createSSEDomainFixPass(); -/// createX87FPRegKillInserterPass - This function returns a pass which -/// inserts FP_REG_KILL instructions where needed. -/// -FunctionPass *createX87FPRegKillInserterPass(); - /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified MCE object. FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index a53f973c1c43..a19f1acffaca 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -67,6 +67,8 @@ def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", "Enable AVX instructions">; +def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true", + "Enable carry-less multiplication instructions">; def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", "Enable three-operand fused multiple-add">; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", @@ -180,8 +182,6 @@ include "X86CallingConv.td" // Currently the X86 assembly parser only supports ATT syntax. def ATTAsmParser : AsmParser { string AsmParserClassName = "ATTAsmParser"; - string AsmParserInstCleanup = "InstructionCleanup"; - string MatchInstructionName = "MatchInstructionImpl"; int Variant = 0; // Discard comments in assembly strings. diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 2cf65c11f94a..69dc967f9d88 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -11,9 +11,11 @@ #include "X86.h" #include "X86FixupKinds.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/ELFObjectWriter.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MachObjectWriter.h" @@ -190,10 +192,6 @@ public: HasScatteredSymbols = true; } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return 0; - } - bool isVirtualSection(const MCSection &Section) const { const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section); return SE.getType() == MCSectionELF::SHT_NOBITS;; @@ -204,12 +202,43 @@ class ELFX86_32AsmBackend : public ELFX86AsmBackend { public: ELFX86_32AsmBackend(const Target &T) : ELFX86AsmBackend(T) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return new ELFObjectWriter(OS, /*Is64Bit=*/false, + /*IsLittleEndian=*/true, + /*HasRelocationAddend=*/false); + } }; class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: ELFX86_64AsmBackend(const Target &T) : ELFX86AsmBackend(T) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return new ELFObjectWriter(OS, /*Is64Bit=*/true, + /*IsLittleEndian=*/true, + /*HasRelocationAddend=*/true); + } +}; + +class WindowsX86AsmBackend : public X86AsmBackend { + bool Is64Bit; +public: + WindowsX86AsmBackend(const Target &T, bool is64Bit) + : X86AsmBackend(T) + , Is64Bit(is64Bit) { + HasScatteredSymbols = true; + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createWinCOFFObjectWriter(OS, Is64Bit); + } + + bool isVirtualSection(const MCSection &Section) const { + const MCSectionCOFF &SE = static_cast<const MCSectionCOFF&>(Section); + return SE.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; + } }; class DarwinX86AsmBackend : public X86AsmBackend { @@ -290,6 +319,10 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, switch (Triple(TT).getOS()) { case Triple::Darwin: return new DarwinX86_32AsmBackend(T); + case Triple::MinGW32: + case Triple::Cygwin: + case Triple::Win32: + return new WindowsX86AsmBackend(T, false); default: return new ELFX86_32AsmBackend(T); } @@ -300,6 +333,10 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, switch (Triple(TT).getOS()) { case Triple::Darwin: return new DarwinX86_64AsmBackend(T); + case Triple::MinGW64: + case Triple::Cygwin: + case Triple::Win32: + return new WindowsX86AsmBackend(T, true); default: return new ELFX86_64AsmBackend(T); } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 08e6486d5b7a..20110ad788cd 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" -#include "X86ATTInstPrinter.h" -#include "X86IntelInstPrinter.h" +#include "AsmPrinter/X86ATTInstPrinter.h" +#include "AsmPrinter/X86IntelInstPrinter.h" #include "X86MCInstLower.h" #include "X86.h" #include "X86COFFMachineModuleInfo.h" @@ -24,6 +24,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Type.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -35,6 +36,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" @@ -218,6 +220,10 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { default: llvm_unreachable("Unknown pcrel immediate operand"); + case MachineOperand::MO_Register: + // pc-relativeness was handled when computing the value in the reg. + printOperand(MI, OpNo, O); + return; case MachineOperand::MO_Immediate: O << MO.getImm(); return; @@ -655,6 +661,47 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } +MachineLocation +X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + MachineLocation Location; + assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + + if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); + else { + DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + } + return Location; +} + +void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &O) { + // Only the target-dependent form of DBG_VALUE should get here. + // Referencing the offset and metadata as NOps-2 and NOps-1 is + // probably portable to other targets; frame pointer location is not. + unsigned NOps = MI->getNumOperands(); + assert(NOps==7); + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + if (V.getContext().isSubprogram()) + O << DISubprogram(V.getContext()).getDisplayName() << ":"; + O << V.getName(); + O << " <- "; + // Frame address. Currently handles register +- offset only. + O << '['; + if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) + printOperand(MI, 0, O); + else + O << "undef"; + O << '+'; printOperand(MI, 3, O); + O << ']'; + O << "+"; + printOperand(MI, NOps-2, O); +} + + //===----------------------------------------------------------------------===// // Target Registry Stuff diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index b5a7f8dc321a..e61be66c75a2 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -14,9 +14,9 @@ #ifndef X86ASMPRINTER_H #define X86ASMPRINTER_H -#include "../X86.h" -#include "../X86MachineFunctionInfo.h" -#include "../X86TargetMachine.h" +#include "X86.h" +#include "X86MachineFunctionInfo.h" +#include "X86TargetMachine.h" #include "llvm/ADT/StringSet.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index a6a1e4e573cf..e3409effc318 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -33,13 +33,19 @@ def RetCC_X86Common : CallingConv<[ CCIfType<[i16], CCAssignToReg<[AX, DX]>>, CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>, CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>, - - // Vector types are returned in XMM0 and XMM1, when they fit. XMMM2 and XMM3 + + // Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3 // can only be used by ABI non-compliant code. If the target doesn't have XMM // registers, it won't have vector types. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>, + // 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3 + // can only be used by ABI non-compliant code. This vector type is only + // supported while using the AVX target feature. + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCIfSubtarget<"hasAVX()", CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>>, + // MMX vector types are always returned in MM0. If the target doesn't have // MM0, it doesn't support these vector types. CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>, @@ -164,11 +170,16 @@ def CC_X86_64_C : CallingConv<[ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfSubtarget<"hasSSE1()", CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - + + // The first 8 256-bit vector arguments are passed in YMM registers. + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCIfSubtarget<"hasAVX()", + CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>, + // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - + // Long doubles get stack slots whose size and alignment depends on the // subtarget. CCIfType<[f80], CCAssignToStack<0, 0>>, @@ -176,6 +187,10 @@ def CC_X86_64_C : CallingConv<[ // Vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, + // 256-bit vectors get 32-byte stack slots that are 32-byte aligned. + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCAssignToStack<32, 32>>, + // __m64 vectors get 8-byte stack slots that are 8-byte aligned. CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> ]>; @@ -271,9 +286,18 @@ def CC_X86_32_Common : CallingConv<[ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>, + // The first 4 AVX 256-bit vector arguments are passed in YMM registers. + CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCIfSubtarget<"hasAVX()", + CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>, + // Other SSE vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, + // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned. + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCAssignToStack<32, 32>>, + // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are // passed in the parameter area. CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index f13669bd741d..824021c0c882 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -53,12 +53,12 @@ namespace { public: static char ID; explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce) - : MachineFunctionPass(&ID), II(0), TD(0), TM(tm), + : MachineFunctionPass(ID), II(0), TD(0), TM(tm), MCE(mce), PICBaseOffset(0), Is64BitMode(false), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} Emitter(X86TargetMachine &tm, CodeEmitter &mce, const X86InstrInfo &ii, const TargetData &td, bool is64) - : MachineFunctionPass(&ID), II(&ii), TD(&td), TM(tm), + : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), MCE(mce), PICBaseOffset(0), Is64BitMode(is64), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} @@ -146,6 +146,103 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { return false; } +/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 +/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand +/// size, and 3) use of X86-64 extended registers. +static unsigned determineREX(const MachineInstr &MI) { + unsigned REX = 0; + const TargetInstrDesc &Desc = MI.getDesc(); + + // Pseudo instructions do not need REX prefix byte. + if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) + return 0; + if (Desc.TSFlags & X86II::REX_W) + REX |= 1 << 3; + + unsigned NumOps = Desc.getNumOperands(); + if (NumOps) { + bool isTwoAddr = NumOps > 1 && + Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. + unsigned i = isTwoAddr ? 1 : 0; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) + REX |= 0x40; + } + } + + switch (Desc.TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= (1 << 0) | (1 << 2); + break; + case X86II::MRMSrcReg: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << 0; + } + break; + } + case X86II::MRMSrcMem: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + unsigned Bit = 0; + i = isTwoAddr ? 2 : 1; + for (; i != NumOps; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: { + unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); + i = isTwoAddr ? 1 : 0; + if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e))) + REX |= 1 << 2; + unsigned Bit = 0; + for (; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + default: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 0; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << 2; + } + break; + } + } + } + return REX; +} + + /// emitPCRelativeBlockAddress - This method keeps track of the information /// necessary to resolve the address of this block later and emits a dummy /// value. @@ -569,7 +666,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, // Handle REX prefix. if (Is64BitMode) { - if (unsigned REX = X86InstrInfo::determineREX(MI)) + if (unsigned REX = determineREX(MI)) MCE.emitByte(0x40 | REX); } @@ -605,24 +702,29 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, // base address. switch (Opcode) { default: - llvm_unreachable("psuedo instructions should be removed before code" + llvm_unreachable("pseudo instructions should be removed before code" " emission"); break; + // Do nothing for Int_MemBarrier - it's just a comment. Add a debug + // to make it slightly easier to see. + case X86::Int_MemBarrier: + DEBUG(dbgs() << "#MEMBARRIER\n"); + break; + case TargetOpcode::INLINEASM: // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. if (MI.getOperand(0).getSymbolName()[0]) report_fatal_error("JIT does not support inline asm!"); break; - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::GC_LABEL: case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; - + case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: - case X86::FP_REG_KILL: break; case X86::MOVPC32r: { // This emits the "call" portion of this pseudo instruction. @@ -674,7 +776,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, } assert(MO.isImm() && "Unknown RawFrm operand!"); - if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { + if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 || + Opcode == X86::WINCALL64pcrel32) { // Fix up immediate operand for pc relative calls. intptr_t Imm = (intptr_t)MO.getImm(); Imm = Imm - MCE.getCurrentPCValue() - 4; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index ce1370763b77..0c70eec4827f 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -960,9 +960,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - // Fold the common case of a conditional branch with a comparison. + // Fold the common case of a conditional branch with a comparison + // in the same block (values defined on other blocks may not have + // initialized registers). if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { - if (CI->hasOneUse()) { + if (CI->hasOneUse() && CI->getParent() == I->getParent()) { EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); // Try to take advantage of fallthrough opportunities. @@ -1058,10 +1060,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { const MachineInstr &MI = *RI; if (MI.definesRegister(Reg)) { - unsigned Src, Dst, SrcSR, DstSR; - - if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) { - Reg = Src; + if (MI.isCopy()) { + Reg = MI.getOperand(1).getReg(); continue; } @@ -1648,15 +1648,26 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { MachineInstrBuilder MIB; if (CalleeOp) { // Register-indirect call. - unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r; + unsigned CallOpc; + if (Subtarget->isTargetWin64()) + CallOpc = X86::WINCALL64r; + else if (Subtarget->is64Bit()) + CallOpc = X86::CALL64r; + else + CallOpc = X86::CALL32r; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addReg(CalleeOp); } else { // Direct call. assert(GV && "Not a direct call"); - unsigned CallOpc = - Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; + unsigned CallOpc; + if (Subtarget->isTargetWin64()) + CallOpc = X86::WINCALL64pcrel32; + else if (Subtarget->is64Bit()) + CallOpc = X86::CALL64pcrel32; + else + CallOpc = X86::CALLpcrel32; // See if we need any target-specific flags on the GV operand. unsigned char OpFlags = 0; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index cee4ad70201a..e6ebf669587d 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -8,23 +8,18 @@ //===----------------------------------------------------------------------===// // // This file defines the pass which converts floating point instructions from -// virtual registers into register stack instructions. This pass uses live +// pseudo registers into register stack instructions. This pass uses live // variable information to indicate where the FPn registers are used and their // lifetimes. // -// This pass is hampered by the lack of decent CFG manipulation routines for -// machine code. In particular, this wants to be able to split critical edges -// as necessary, traverse the machine basic block CFG in depth-first order, and -// allow there to be multiple machine basic blocks for each LLVM basicblock -// (needed for critical edge splitting). +// The x87 hardware tracks liveness of the stack registers, so it is necessary +// to implement exact liveness tracking between basic blocks. The CFG edges are +// partitioned into bundles where the same FP registers must be live in +// identical stack positions. Instructions are inserted at the end of each basic +// block to rearrange the live registers to match the outgoing bundle. // -// In particular, this pass currently barfs on critical edges. Because of this, -// it requires the instruction selector to insert FP_REG_KILL instructions on -// the exits of any basic block that has critical edges going from it, or which -// branch to a critical basic block. -// -// FIXME: this is not implemented yet. The stackifier pass only works on local -// basic blocks. +// This approach avoids splitting critical edges at the potential cost of more +// live register shuffling instructions when critical edges are present. // //===----------------------------------------------------------------------===// @@ -32,6 +27,7 @@ #include "X86.h" #include "X86InstrInfo.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -54,7 +50,12 @@ STATISTIC(NumFP , "Number of floating point instructions"); namespace { struct FPS : public MachineFunctionPass { static char ID; - FPS() : MachineFunctionPass(&ID) {} + FPS() : MachineFunctionPass(ID) { + // This is really only to keep valgrind quiet. + // The logic in isLive() is too much for it. + memset(Stack, 0, sizeof(Stack)); + memset(RegMap, 0, sizeof(RegMap)); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -69,11 +70,71 @@ namespace { private: const TargetInstrInfo *TII; // Machine instruction info. + + // Two CFG edges are related if they leave the same block, or enter the same + // block. The transitive closure of an edge under this relation is a + // LiveBundle. It represents a set of CFG edges where the live FP stack + // registers must be allocated identically in the x87 stack. + // + // A LiveBundle is usually all the edges leaving a block, or all the edges + // entering a block, but it can contain more edges if critical edges are + // present. + // + // The set of live FP registers in a LiveBundle is calculated by bundleCFG, + // but the exact mapping of FP registers to stack slots is fixed later. + struct LiveBundle { + // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c. + unsigned Mask; + + // Number of pre-assigned live registers in FixStack. This is 0 when the + // stack order has not yet been fixed. + unsigned FixCount; + + // Assigned stack order for live-in registers. + // FixStack[i] == getStackEntry(i) for all i < FixCount. + unsigned char FixStack[8]; + + LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {} + + // Have the live registers been assigned a stack order yet? + bool isFixed() const { return !Mask || FixCount; } + }; + + // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges + // with no live FP registers. + SmallVector<LiveBundle, 8> LiveBundles; + + // Map each MBB in the current function to an (ingoing, outgoing) index into + // LiveBundles. Blocks with no FP registers live in or out map to (0, 0) + // and are not actually stored in the map. + DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle; + + // Return a bitmask of FP registers in block's live-in list. + unsigned calcLiveInMask(MachineBasicBlock *MBB) { + unsigned Mask = 0; + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) { + unsigned Reg = *I - X86::FP0; + if (Reg < 8) + Mask |= 1 << Reg; + } + return Mask; + } + + // Partition all the CFG edges into LiveBundles. + void bundleCFG(MachineFunction &MF); + MachineBasicBlock *MBB; // Current basic block unsigned Stack[8]; // FP<n> Registers in each stack slot... unsigned RegMap[8]; // Track which stack slot contains each register unsigned StackTop; // The current top of the FP stack. + // Set up our stack model to match the incoming registers to MBB. + void setupBlockStack(); + + // Shuffle live registers to match the expectations of successor blocks. + void finishBlockStack(); + void dumpStack() const { dbgs() << "Stack contents:"; for (unsigned i = 0; i != StackTop; ++i) { @@ -82,27 +143,36 @@ namespace { } dbgs() << "\n"; } - private: - /// isStackEmpty - Return true if the FP stack is empty. - bool isStackEmpty() const { - return StackTop == 0; - } - - // getSlot - Return the stack slot number a particular register number is - // in. + + /// getSlot - Return the stack slot number a particular register number is + /// in. unsigned getSlot(unsigned RegNo) const { assert(RegNo < 8 && "Regno out of range!"); return RegMap[RegNo]; } - // getStackEntry - Return the X86::FP<n> register in register ST(i). + /// isLive - Is RegNo currently live in the stack? + bool isLive(unsigned RegNo) const { + unsigned Slot = getSlot(RegNo); + return Slot < StackTop && Stack[Slot] == RegNo; + } + + /// getScratchReg - Return an FP register that is not currently in use. + unsigned getScratchReg() { + for (int i = 7; i >= 0; --i) + if (!isLive(i)) + return i; + llvm_unreachable("Ran out of scratch FP registers"); + } + + /// getStackEntry - Return the X86::FP<n> register in register ST(i). unsigned getStackEntry(unsigned STi) const { assert(STi < StackTop && "Access past stack top!"); return Stack[StackTop-1-STi]; } - // getSTReg - Return the X86::ST(i) register which contains the specified - // FP<RegNo> register. + /// getSTReg - Return the X86::ST(i) register which contains the specified + /// FP<RegNo> register. unsigned getSTReg(unsigned RegNo) const { return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; } @@ -117,10 +187,9 @@ namespace { bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { - MachineInstr *MI = I; - DebugLoc dl = MI->getDebugLoc(); + DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); if (isAtTop(RegNo)) return; - + unsigned STReg = getSTReg(RegNo); unsigned RegOnTop = getStackEntry(0); @@ -137,24 +206,37 @@ namespace { } void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { - DebugLoc dl = I->getDebugLoc(); + DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); unsigned STReg = getSTReg(RegNo); pushReg(AsReg); // New register on top of stack BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); } - // popStackAfter - Pop the current value off of the top of the FP stack - // after the specified instruction. + /// popStackAfter - Pop the current value off of the top of the FP stack + /// after the specified instruction. void popStackAfter(MachineBasicBlock::iterator &I); - // freeStackSlotAfter - Free the specified register from the register stack, - // so that it is no longer in a register. If the register is currently at - // the top of the stack, we just pop the current instruction, otherwise we - // store the current top-of-stack into the specified slot, then pop the top - // of stack. + /// freeStackSlotAfter - Free the specified register from the register + /// stack, so that it is no longer in a register. If the register is + /// currently at the top of the stack, we just pop the current instruction, + /// otherwise we store the current top-of-stack into the specified slot, + /// then pop the top of stack. void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); + /// freeStackSlotBefore - Just the pop, no folding. Return the inserted + /// instruction. + MachineBasicBlock::iterator + freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo); + + /// Adjust the live registers to be the set in Mask. + void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); + + /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is + /// st(0), FP reg FixStack[1] is st(1) etc. + void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, + MachineBasicBlock::iterator I); + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); void handleZeroArgFP(MachineBasicBlock::iterator &I); @@ -181,7 +263,6 @@ static unsigned getFPReg(const MachineOperand &MO) { return Reg - X86::FP0; } - /// runOnMachineFunction - Loop over all of the basic blocks, transforming FP /// register references into FP stack references. /// @@ -201,6 +282,10 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { if (!FPIsUsed) return false; TII = MF.getTarget().getInstrInfo(); + + // Prepare cross-MBB liveness. + bundleCFG(MF); + StackTop = 0; // Process the function in depth first order so that we process at least one @@ -215,16 +300,111 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { Changed |= processBasicBlock(MF, **I); // Process any unreachable blocks in arbitrary order now. - if (MF.size() == Processed.size()) - return Changed; + if (MF.size() != Processed.size()) + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + if (Processed.insert(BB)) + Changed |= processBasicBlock(MF, *BB); + + BlockBundle.clear(); + LiveBundles.clear(); - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) - if (Processed.insert(BB)) - Changed |= processBasicBlock(MF, *BB); - return Changed; } +/// bundleCFG - Scan all the basic blocks to determine consistent live-in and +/// live-out sets for the FP registers. Consistent means that the set of +/// registers live-out from a block is identical to the live-in set of all +/// successors. This is not enforced by the normal live-in lists since +/// registers may be implicitly defined, or not used by all successors. +void FPS::bundleCFG(MachineFunction &MF) { + assert(LiveBundles.empty() && "Stale data in LiveBundles"); + assert(BlockBundle.empty() && "Stale data in BlockBundle"); + SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp; + + // LiveBundle[0] is the empty live-in set. + LiveBundles.resize(1); + + // First gather the actual live-in masks for all MBBs. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = I; + const unsigned Mask = calcLiveInMask(MBB); + if (!Mask) + continue; + // Ingoing bundle index. + unsigned &Idx = BlockBundle[MBB].first; + // Already assigned an ingoing bundle? + if (Idx) + continue; + // Allocate a new LiveBundle struct for this block's live-ins. + const unsigned BundleIdx = Idx = LiveBundles.size(); + DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#" + << MBB->getNumber()); + LiveBundles.push_back(Mask); + LiveBundle &Bundle = LiveBundles.back(); + + // Make sure all predecessors have the same live-out set. + PropUp.insert(MBB); + + // Keep pushing liveness up and down the CFG until convergence. + // Only critical edges cause iteration here, but when they do, multiple + // blocks can be assigned to the same LiveBundle index. + do { + // Assign BundleIdx as liveout from predecessors in PropUp. + for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(), + E = PropUp.end(); I != E; ++I) { + MachineBasicBlock *MBB = *I; + for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(), + LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) { + MachineBasicBlock *PredMBB = *LinkI; + // PredMBB's liveout bundle should be set to LIIdx. + unsigned &Idx = BlockBundle[PredMBB].second; + if (Idx) { + assert(Idx == BundleIdx && "Inconsistent CFG"); + continue; + } + Idx = BundleIdx; + DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber()); + // Propagate to siblings. + if (PredMBB->succ_size() > 1) + PropDown.insert(PredMBB); + } + } + PropUp.clear(); + + // Assign BundleIdx as livein to successors in PropDown. + for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(), + E = PropDown.end(); I != E; ++I) { + MachineBasicBlock *MBB = *I; + for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(), + LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) { + MachineBasicBlock *SuccMBB = *LinkI; + // LinkMBB's livein bundle should be set to BundleIdx. + unsigned &Idx = BlockBundle[SuccMBB].first; + if (Idx) { + assert(Idx == BundleIdx && "Inconsistent CFG"); + continue; + } + Idx = BundleIdx; + DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber()); + // Propagate to siblings. + if (SuccMBB->pred_size() > 1) + PropUp.insert(SuccMBB); + // Also accumulate the bundle liveness mask from the liveins here. + Bundle.Mask |= calcLiveInMask(SuccMBB); + } + } + PropDown.clear(); + } while (!PropUp.empty()); + DEBUG({ + dbgs() << " live:"; + for (unsigned i = 0; i < 8; ++i) + if (Bundle.Mask & (1<<i)) + dbgs() << " %FP" << i; + dbgs() << '\n'; + }); + } +} + /// processBasicBlock - Loop over all of the instructions in the basic block, /// transforming FP instructions into their stack form. /// @@ -232,10 +412,12 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { bool Changed = false; MBB = &BB; + setupBlockStack(); + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { MachineInstr *MI = I; uint64_t Flags = MI->getDesc().TSFlags; - + unsigned FPInstClass = Flags & X86II::FPTypeMask; if (MI->isInlineAsm()) FPInstClass = X86II::SpecialFP; @@ -302,10 +484,82 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { Changed = true; } - assert(isStackEmpty() && "Stack not empty at end of basic block?"); + finishBlockStack(); + return Changed; } +/// setupBlockStack - Use the BlockBundle map to set up our model of the stack +/// to match predecessors' live out stack. +void FPS::setupBlockStack() { + DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() + << " derived from " << MBB->getName() << ".\n"); + StackTop = 0; + const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first]; + + if (!Bundle.Mask) { + DEBUG(dbgs() << "Block has no FP live-ins.\n"); + return; + } + + // Depth-first iteration should ensure that we always have an assigned stack. + assert(Bundle.isFixed() && "Reached block before any predecessors"); + + // Push the fixed live-in registers. + for (unsigned i = Bundle.FixCount; i > 0; --i) { + MBB->addLiveIn(X86::ST0+i-1); + DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" + << unsigned(Bundle.FixStack[i-1]) << '\n'); + pushReg(Bundle.FixStack[i-1]); + } + + // Kill off unwanted live-ins. This can happen with a critical edge. + // FIXME: We could keep these live registers around as zombies. They may need + // to be revived at the end of a short block. It might save a few instrs. + adjustLiveRegs(calcLiveInMask(MBB), MBB->begin()); + DEBUG(MBB->dump()); +} + +/// finishBlockStack - Revive live-outs that are implicitly defined out of +/// MBB. Shuffle live registers to match the expected fixed stack of any +/// predecessors, and ensure that all predecessors are expecting the same +/// stack. +void FPS::finishBlockStack() { + // The RET handling below takes care of return blocks for us. + if (MBB->succ_empty()) + return; + + DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() + << " derived from " << MBB->getName() << ".\n"); + + unsigned BundleIdx = BlockBundle.lookup(MBB).second; + LiveBundle &Bundle = LiveBundles[BundleIdx]; + + // We may need to kill and define some registers to match successors. + // FIXME: This can probably be combined with the shuffle below. + MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); + adjustLiveRegs(Bundle.Mask, Term); + + if (!Bundle.Mask) { + DEBUG(dbgs() << "No live-outs.\n"); + return; + } + + // Has the stack order been fixed yet? + DEBUG(dbgs() << "LB#" << BundleIdx << ": "); + if (Bundle.isFixed()) { + DEBUG(dbgs() << "Shuffling stack to match.\n"); + shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term); + } else { + // Not fixed yet, we get to choose. + DEBUG(dbgs() << "Fixing stack order now.\n"); + Bundle.FixCount = StackTop; + for (unsigned i = 0; i < StackTop; ++i) + Bundle.FixStack[i] = getStackEntry(i); + } +} + + //===----------------------------------------------------------------------===// // Efficient Lookup Table Support //===----------------------------------------------------------------------===// @@ -318,7 +572,7 @@ namespace { friend bool operator<(const TableEntry &TE, unsigned V) { return TE.from < V; } - friend bool operator<(unsigned V, const TableEntry &TE) { + friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) { return V < TE.from; } }; @@ -597,6 +851,13 @@ void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { // Otherwise, store the top of stack into the dead slot, killing the operand // without having to add in an explicit xchg then pop. // + I = freeStackSlotBefore(++I, FPRegNo); +} + +/// freeStackSlotBefore - Free the specified register without trying any +/// folding. +MachineBasicBlock::iterator +FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) { unsigned STReg = getSTReg(FPRegNo); unsigned OldSlot = getSlot(FPRegNo); unsigned TopReg = Stack[StackTop-1]; @@ -604,9 +865,90 @@ void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { RegMap[TopReg] = OldSlot; RegMap[FPRegNo] = ~0; Stack[--StackTop] = ~0; - MachineInstr *MI = I; - DebugLoc dl = MI->getDebugLoc(); - I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(STReg); + return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg); +} + +/// adjustLiveRegs - Kill and revive registers such that exactly the FP +/// registers with a bit in Mask are live. +void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { + unsigned Defs = Mask; + unsigned Kills = 0; + for (unsigned i = 0; i < StackTop; ++i) { + unsigned RegNo = Stack[i]; + if (!(Defs & (1 << RegNo))) + // This register is live, but we don't want it. + Kills |= (1 << RegNo); + else + // We don't need to imp-def this live register. + Defs &= ~(1 << RegNo); + } + assert((Kills & Defs) == 0 && "Register needs killing and def'ing?"); + + // Produce implicit-defs for free by using killed registers. + while (Kills && Defs) { + unsigned KReg = CountTrailingZeros_32(Kills); + unsigned DReg = CountTrailingZeros_32(Defs); + DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n"); + std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]); + std::swap(RegMap[KReg], RegMap[DReg]); + Kills &= ~(1 << KReg); + Defs &= ~(1 << DReg); + } + + // Kill registers by popping. + if (Kills && I != MBB->begin()) { + MachineBasicBlock::iterator I2 = llvm::prior(I); + for (;;) { + unsigned KReg = getStackEntry(0); + if (!(Kills & (1 << KReg))) + break; + DEBUG(dbgs() << "Popping %FP" << KReg << "\n"); + popStackAfter(I2); + Kills &= ~(1 << KReg); + } + } + + // Manually kill the rest. + while (Kills) { + unsigned KReg = CountTrailingZeros_32(Kills); + DEBUG(dbgs() << "Killing %FP" << KReg << "\n"); + freeStackSlotBefore(I, KReg); + Kills &= ~(1 << KReg); + } + + // Load zeros for all the imp-defs. + while(Defs) { + unsigned DReg = CountTrailingZeros_32(Defs); + DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n"); + BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0)); + pushReg(DReg); + Defs &= ~(1 << DReg); + } + + // Now we should have the correct registers live. + DEBUG(dumpStack()); + assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch"); +} + +/// shuffleStackTop - emit fxch instructions before I to shuffle the top +/// FixCount entries into the order given by FixStack. +/// FIXME: Is there a better algorithm than insertion sort? +void FPS::shuffleStackTop(const unsigned char *FixStack, + unsigned FixCount, + MachineBasicBlock::iterator I) { + // Move items into place, starting from the desired stack bottom. + while (FixCount--) { + // Old register at position FixCount. + unsigned OldReg = getStackEntry(FixCount); + // Desired register at position FixCount. + unsigned Reg = FixStack[FixCount]; + if (Reg == OldReg) + continue; + // (Reg st0) (OldReg st0) = (Reg OldReg st0) + moveToTop(Reg, I); + moveToTop(OldReg, I); + } + DEBUG(dumpStack()); } @@ -660,7 +1002,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { MI->getOpcode() == X86::ISTT_Fp32m80 || MI->getOpcode() == X86::ISTT_Fp64m80 || MI->getOpcode() == X86::ST_FpP80m)) { - duplicateToTop(Reg, 7 /*temp register*/, I); + duplicateToTop(Reg, getScratchReg(), I); } else { moveToTop(Reg, I); // Move to the top of the stack... } @@ -1013,8 +1355,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { if (!MI->killsRegister(X86::FP0 + Op0)) { // Duplicate Op0 into a temporary on the stack top. - // This actually assumes that FP7 is dead. - duplicateToTop(Op0, 7, I); + duplicateToTop(Op0, getScratchReg(), I); } else { // Op0 is killed, so just swap it into position. moveToTop(Op0, I); @@ -1034,8 +1375,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { ++StackTop; unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). if (!MI->killsRegister(X86::FP0 + Op0)) { - // Assume FP6 is not live, use it as a scratch register. - duplicateToTop(Op0, 6, I); + duplicateToTop(Op0, getScratchReg(), I); moveToTop(RegOnTop, I); } else if (getSTReg(Op0) != X86::ST1) { // We have the wrong value at st(1). Shuffle! Untested! @@ -1119,11 +1459,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { case X86::RETI: // If RET has an FP register use operand, pass the first one in ST(0) and // the second one in ST(1). - if (isStackEmpty()) return; // Quick check to see if any are possible. - + // Find the register operands. unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; - + unsigned LiveMask = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) @@ -1142,12 +1482,18 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { assert(SecondFPRegOp == ~0U && "More than two fp operands!"); SecondFPRegOp = getFPReg(Op); } + LiveMask |= (1 << getFPReg(Op)); // Remove the operand so that later passes don't see it. MI->RemoveOperand(i); --i, --e; } - + + // We may have been carrying spurious live-ins, so make sure only the returned + // registers are left live. + adjustLiveRegs(LiveMask, MI); + if (!LiveMask) return; // Quick check to see if any are possible. + // There are only four possibilities here: // 1) we are returning a single FP value. In this case, it has to be in // ST(0) already, so just declare success by removing the value from the @@ -1173,7 +1519,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // Duplicate the TOS so that we return it twice. Just pick some other FPx // register to hold it. - unsigned NewReg = (FirstFPRegOp+1)%7; + unsigned NewReg = getScratchReg(); duplicateToTop(FirstFPRegOp, NewReg, MI); FirstFPRegOp = NewReg; } @@ -1197,7 +1543,14 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } I = MBB->erase(I); // Remove the pseudo instruction - --I; + + // We want to leave I pointing to the previous instruction, but what if we + // just erased the first instruction? + if (I == MBB->begin()) { + DEBUG(dbgs() << "Inserting dummy KILL\n"); + I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL)); + } else + --I; } // Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp deleted file mode 100644 index 2c98b96c510b..000000000000 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ /dev/null @@ -1,153 +0,0 @@ -//===-- X86FloatingPoint.cpp - FP_REG_KILL inserter -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the pass which inserts FP_REG_KILL instructions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "x86-codegen" -#include "X86.h" -#include "X86InstrInfo.h" -#include "llvm/Instructions.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/CFG.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -STATISTIC(NumFPKill, "Number of FP_REG_KILL instructions added"); - -namespace { - struct FPRegKiller : public MachineFunctionPass { - static char ID; - FPRegKiller() : MachineFunctionPass(&ID) {} - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "X86 FP_REG_KILL inserter"; - } - }; - char FPRegKiller::ID = 0; -} - -FunctionPass *llvm::createX87FPRegKillInserterPass() { - return new FPRegKiller(); -} - -/// isFPStackVReg - Return true if the specified vreg is from a fp stack -/// register class. -static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(RegNo)) - return false; - - switch (MRI.getRegClass(RegNo)->getID()) { - default: return false; - case X86::RFP32RegClassID: - case X86::RFP64RegClassID: - case X86::RFP80RegClassID: - return true; - } -} - - -/// ContainsFPStackCode - Return true if the specific MBB has floating point -/// stack code, and thus needs an FP_REG_KILL. -static bool ContainsFPStackCode(MachineBasicBlock *MBB, - const MachineRegisterInfo &MRI) { - // Scan the block, looking for instructions that define or use fp stack vregs. - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - if (!I->getOperand(op).isReg()) - continue; - if (unsigned Reg = I->getOperand(op).getReg()) - if (isFPStackVReg(Reg, MRI)) - return true; - } - } - - // Check PHI nodes in successor blocks. These PHI's will be lowered to have - // a copy of the input value in this block, which is a definition of the - // value. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - E = MBB->succ_end(); SI != E; ++ SI) { - MachineBasicBlock *SuccBB = *SI; - for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end(); - I != E; ++I) { - // All PHI nodes are at the top of the block. - if (!I->isPHI()) break; - - if (isFPStackVReg(I->getOperand(0).getReg(), MRI)) - return true; - } - } - - return false; -} - -bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { - // If we are emitting FP stack code, scan the basic block to determine if this - // block defines or uses any FP values. If so, put an FP_REG_KILL instruction - // before the terminator of the block. - - // Note that FP stack instructions are used in all modes for long double, - // so we always need to do this check. - // Also note that it's possible for an FP stack register to be live across - // an instruction that produces multiple basic blocks (SSE CMOV) so we - // must check all the generated basic blocks. - - // Scan all of the machine instructions in these MBBs, checking for FP - // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.) - - // Fast-path: If nothing is using the x87 registers, we don't need to do - // any scanning. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() && - MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() && - MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty()) - return false; - - bool Changed = false; - MachineFunction::iterator MBBI = MF.begin(); - MachineFunction::iterator EndMBB = MF.end(); - for (; MBBI != EndMBB; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - - // If this block returns, ignore it. We don't want to insert an FP_REG_KILL - // before the return. - if (!MBB->empty()) { - MachineBasicBlock::iterator EndI = MBB->end(); - --EndI; - if (EndI->getDesc().isReturn()) - continue; - } - - // If we find any FP stack code, emit the FP_REG_KILL instruction. - if (ContainsFPStackCode(MBB, MRI)) { - BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(), - MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL)); - ++NumFPKill; - Changed = true; - } - } - - return Changed; -} diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 72f2bc11d7cc..c5234413aba6 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -171,6 +171,17 @@ namespace { virtual void PreprocessISelDAG(); + inline bool immSext8(SDNode *N) const { + return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue()); + } + + // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit + // sign extended field. + inline bool i64immSExt32(SDNode *N) const { + uint64_t v = cast<ConstantSDNode>(N)->getZExtValue(); + return (int64_t)v == (int32_t)v; + } + // Include the pieces autogenerated from the target description. #include "X86GenDAGISel.inc" @@ -1312,13 +1323,6 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } -static SDNode *FindCallStartFromCall(SDNode *Node) { - if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Node doesn't have a token chain argument!"); - return FindCallStartFromCall(Node->getOperand(0).getNode()); -} - SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue Chain = Node->getOperand(0); SDValue In1 = Node->getOperand(1); @@ -1403,7 +1407,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC16m; else if (isSub) { if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_SUB16mi8; else Opc = X86::LOCK_SUB16mi; @@ -1411,7 +1415,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB16mr; } else { if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_ADD16mi8; else Opc = X86::LOCK_ADD16mi; @@ -1426,7 +1430,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC32m; else if (isSub) { if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_SUB32mi8; else Opc = X86::LOCK_SUB32mi; @@ -1434,7 +1438,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB32mr; } else { if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_ADD32mi8; else Opc = X86::LOCK_ADD32mi; @@ -1450,17 +1454,17 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { else if (isSub) { Opc = X86::LOCK_SUB64mr; if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_SUB64mi8; - else if (Predicate_i64immSExt32(Val.getNode())) + else if (i64immSExt32(Val.getNode())) Opc = X86::LOCK_SUB64mi32; } } else { Opc = X86::LOCK_ADD64mr; if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_ADD64mi8; - else if (Predicate_i64immSExt32(Val.getNode())) + else if (i64immSExt32(Val.getNode())) Opc = X86::LOCK_ADD64mi32; } } @@ -1841,7 +1845,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to // use a smaller encoding. - if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && + HasNoSignedComparisonUses(Node)) // Look past the truncate if CMP is the only use of it. N0 = N0.getOperand(0); if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b3c48862898f..95dbb6176687 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16,6 +16,7 @@ #include "X86.h" #include "X86InstrBuilder.h" #include "X86ISelLowering.h" +#include "X86ShuffleDecode.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" #include "llvm/CallingConv.h" @@ -343,8 +344,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); - if (!Subtarget->hasSSE2()) - setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + // We may not have a libcall for MEMBARRIER so we should lower this. + setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); + // On X86 and X86-64, atomic operations are lowered to locked instructions. // Locked instructions, in turn, have implicit fence semantics (all memory // operations are flushed before issuing the locked instruction, and they @@ -837,6 +839,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); + // Can turn SHL into an integer multiply. + setOperationAction(ISD::SHL, MVT::v4i32, Custom); + setOperationAction(ISD::SHL, MVT::v16i8, Custom); + // i8 and i16 vectors are custom , because the source register and source // source memory operand types are not the same width. f32 vectors are // custom since the immediate controlling the insert encodes additional @@ -866,6 +872,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); + addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); setOperationAction(ISD::LOAD, MVT::v8f32, Legal); setOperationAction(ISD::LOAD, MVT::v8i32, Legal); @@ -877,7 +884,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v8f32, Legal); setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); setOperationAction(ISD::FNEG, MVT::v8f32, Custom); - //setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom); //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom); //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom); //setOperationAction(ISD::SELECT, MVT::v8f32, Custom); @@ -1189,6 +1196,50 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; } +std::pair<const TargetRegisterClass*, uint8_t> +X86TargetLowering::findRepresentativeClass(EVT VT) const{ + const TargetRegisterClass *RRC = 0; + uint8_t Cost = 1; + switch (VT.getSimpleVT().SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(VT); + case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: + RRC = (Subtarget->is64Bit() + ? X86::GR64RegisterClass : X86::GR32RegisterClass); + break; + case MVT::v8i8: case MVT::v4i16: + case MVT::v2i32: case MVT::v1i64: + RRC = X86::VR64RegisterClass; + break; + case MVT::f32: case MVT::f64: + case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: + case MVT::v4f32: case MVT::v2f64: + case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32: + case MVT::v4f64: + RRC = X86::VR128RegisterClass; + break; + } + return std::make_pair(RRC, Cost); +} + +unsigned +X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0; + switch (RC->getID()) { + default: + return 0; + case X86::GR32RegClassID: + return 4 - FPDiff; + case X86::GR64RegClassID: + return 8 - FPDiff; + case X86::VR128RegClassID: + return Subtarget->is64Bit() ? 10 : 4; + case X86::VR64RegClassID: + return 4; + } +} + bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const { if (!Subtarget->isTargetLinux()) @@ -1259,6 +1310,19 @@ X86TargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue ValToCopy = OutVals[i]; + EVT ValVT = ValToCopy.getValueType(); + + // If this is x86-64, and we disabled SSE, we can't return FP values + if ((ValVT == MVT::f32 || ValVT == MVT::f64) && + (Subtarget->is64Bit() && !Subtarget->hasSSE1())) { + report_fatal_error("SSE register return with SSE disabled"); + } + // Likewise we can't return F64 values with SSE1 only. gcc does so, but + // llvm-gcc has never done it right and no one has noticed, so this + // should be OK for now. + if (ValVT == MVT::f64 && + (Subtarget->is64Bit() && !Subtarget->hasSSE2())) + report_fatal_error("SSE2 register return with SSE2 disabled"); // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. @@ -1276,14 +1340,20 @@ X86TargetLowering::LowerReturn(SDValue Chain, // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 // which is returned in RAX / RDX. if (Subtarget->is64Bit()) { - EVT ValVT = ValToCopy.getValueType(); if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); - if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) - ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); + if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { + ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, + ValToCopy); + + // If we don't have SSE2 available, convert to v4f32 so the generated + // register is legal. + if (!Subtarget->hasSSE2()) + ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy); + } } } - + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); Flag = Chain.getValue(1); } @@ -1570,6 +1640,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, RC = X86::FR32RegisterClass; else if (RegVT == MVT::f64) RC = X86::FR64RegisterClass; + else if (RegVT.isVector() && RegVT.getSizeInBits() == 256) + RC = X86::VR256RegisterClass; else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) RC = X86::VR128RegisterClass; else if (RegVT.isVector() && RegVT.getSizeInBits() == 64) @@ -1937,6 +2009,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (isVarArg && Subtarget->isTargetWin64()) { + // Win64 ABI requires argument XMM reg to be copied to the corresponding + // shadow reg if callee is a varargs function. + unsigned ShadowReg = 0; + switch (VA.getLocReg()) { + case X86::XMM0: ShadowReg = X86::RCX; break; + case X86::XMM1: ShadowReg = X86::RDX; break; + case X86::XMM2: ShadowReg = X86::R8; break; + case X86::XMM3: ShadowReg = X86::R9; break; + } + if (ShadowReg) + RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); + } } else if (!IsSibcall && (!isTailCall || isByVal)) { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) @@ -1990,7 +2075,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } } - if (Is64Bit && isVarArg) { + if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) { // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in @@ -1999,7 +2084,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // the number of registers, but must be an ubound on the number of SSE // registers used and is in the range 0 - 8 inclusive. - // FIXME: Verify this on Win64 // Count the number of XMM registers allocated. static const unsigned XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, @@ -2165,8 +2249,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (!isTailCall && Subtarget->isPICStyleGOT()) Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); - // Add an implicit use of AL for x86 vararg functions. - if (Is64Bit && isVarArg) + // Add an implicit use of AL for non-Windows x86 64-bit vararg functions. + if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); if (InFlag.getNode()) @@ -2356,8 +2440,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (RegInfo->needsStackRealignment(MF)) return false; - // Do not sibcall optimize vararg calls unless the call site is not passing any - // arguments. + // Do not sibcall optimize vararg calls unless the call site is not passing + // any arguments. if (isVarArg && !Outs.empty()) return false; @@ -2493,6 +2577,112 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { // Other Lowering Hooks //===----------------------------------------------------------------------===// +static bool MayFoldLoad(SDValue Op) { + return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); +} + +static bool MayFoldIntoStore(SDValue Op) { + return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); +} + +static bool isTargetShuffle(unsigned Opcode) { + switch(Opcode) { + default: return false; + case X86ISD::PSHUFD: + case X86ISD::PSHUFHW: + case X86ISD::PSHUFLW: + case X86ISD::SHUFPD: + case X86ISD::SHUFPS: + case X86ISD::MOVLHPS: + case X86ISD::MOVLHPD: + case X86ISD::MOVHLPS: + case X86ISD::MOVLPS: + case X86ISD::MOVLPD: + case X86ISD::MOVSHDUP: + case X86ISD::MOVSLDUP: + case X86ISD::MOVSS: + case X86ISD::MOVSD: + case X86ISD::UNPCKLPS: + case X86ISD::UNPCKLPD: + case X86ISD::PUNPCKLWD: + case X86ISD::PUNPCKLBW: + case X86ISD::PUNPCKLDQ: + case X86ISD::PUNPCKLQDQ: + case X86ISD::UNPCKHPS: + case X86ISD::UNPCKHPD: + case X86ISD::PUNPCKHWD: + case X86ISD::PUNPCKHBW: + case X86ISD::PUNPCKHDQ: + case X86ISD::PUNPCKHQDQ: + return true; + } + return false; +} + +static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, + SDValue V1, SelectionDAG &DAG) { + switch(Opc) { + default: llvm_unreachable("Unknown x86 shuffle node"); + case X86ISD::MOVSHDUP: + case X86ISD::MOVSLDUP: + return DAG.getNode(Opc, dl, VT, V1); + } + + return SDValue(); +} + +static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, + SDValue V1, unsigned TargetMask, SelectionDAG &DAG) { + switch(Opc) { + default: llvm_unreachable("Unknown x86 shuffle node"); + case X86ISD::PSHUFD: + case X86ISD::PSHUFHW: + case X86ISD::PSHUFLW: + return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); + } + + return SDValue(); +} + +static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, + SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) { + switch(Opc) { + default: llvm_unreachable("Unknown x86 shuffle node"); + case X86ISD::SHUFPD: + case X86ISD::SHUFPS: + return DAG.getNode(Opc, dl, VT, V1, V2, + DAG.getConstant(TargetMask, MVT::i8)); + } + return SDValue(); +} + +static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG) { + switch(Opc) { + default: llvm_unreachable("Unknown x86 shuffle node"); + case X86ISD::MOVLHPS: + case X86ISD::MOVLHPD: + case X86ISD::MOVHLPS: + case X86ISD::MOVLPS: + case X86ISD::MOVLPD: + case X86ISD::MOVSS: + case X86ISD::MOVSD: + case X86ISD::UNPCKLPS: + case X86ISD::UNPCKLPD: + case X86ISD::PUNPCKLWD: + case X86ISD::PUNPCKLBW: + case X86ISD::PUNPCKLDQ: + case X86ISD::PUNPCKLQDQ: + case X86ISD::UNPCKHPS: + case X86ISD::UNPCKHPD: + case X86ISD::PUNPCKHWD: + case X86ISD::PUNPCKHBW: + case X86ISD::PUNPCKHDQ: + case X86ISD::PUNPCKHQDQ: + return DAG.getNode(Opc, dl, VT, V1, V2); + } + return SDValue(); +} SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -3347,18 +3537,27 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest - // type. This ensures they get CSE'd. + // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted + // to their dest type. This ensures they get CSE'd. SDValue Vec; if (VT.getSizeInBits() == 64) { // MMX SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - } else if (HasSSE2) { // SSE2 - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); - } else { // SSE1 + } else if (VT.getSizeInBits() == 128) { + if (HasSSE2) { // SSE2 + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + } else { // SSE1 + SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); + } + } else if (VT.getSizeInBits() == 256) { // AVX + // 256-bit logic and arithmetic instructions in AVX are + // all floating-point, no support for integer ops. Default + // to emitting fp zeroed vectors then. SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); } return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } @@ -3372,9 +3571,9 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { // type. This ensures they get CSE'd. SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); SDValue Vec; - if (VT.getSizeInBits() == 64) // MMX + if (VT.getSizeInBits() == 64) // MMX Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else // SSE + else // SSE Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } @@ -3439,9 +3638,8 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. -static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, - bool HasSSE2) { +/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32. +static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { if (SV->getValueType(0).getVectorNumElements() <= 4) return SDValue(SV, 0); @@ -3488,68 +3686,253 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); } -/// getNumOfConsecutiveZeros - Return the number of elements in a result of -/// a shuffle that is zero. -static -unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems, - bool Low, SelectionDAG &DAG) { - unsigned NumZeros = 0; - for (int i = 0; i < NumElems; ++i) { - unsigned Index = Low ? i : NumElems-i-1; - int Idx = SVOp->getMaskElt(Index); - if (Idx < 0) { - ++NumZeros; - continue; - } - SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); - if (Elt.getNode() && X86::isZeroNode(Elt)) - ++NumZeros; - else +/// getShuffleScalarElt - Returns the scalar element that will make up the ith +/// element of the result of the vector shuffle. +SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, + unsigned Depth) { + if (Depth == 6) + return SDValue(); // Limit search depth. + + SDValue V = SDValue(N, 0); + EVT VT = V.getValueType(); + unsigned Opcode = V.getOpcode(); + + // Recurse into ISD::VECTOR_SHUFFLE node to find scalars. + if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) { + Index = SV->getMaskElt(Index); + + if (Index < 0) + return DAG.getUNDEF(VT.getVectorElementType()); + + int NumElems = VT.getVectorNumElements(); + SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1); + return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1); + } + + // Recurse into target specific vector shuffles to find scalars. + if (isTargetShuffle(Opcode)) { + int NumElems = VT.getVectorNumElements(); + SmallVector<unsigned, 16> ShuffleMask; + SDValue ImmN; + + switch(Opcode) { + case X86ISD::SHUFPS: + case X86ISD::SHUFPD: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeSHUFPSMask(NumElems, + cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::PUNPCKHBW: + case X86ISD::PUNPCKHWD: + case X86ISD::PUNPCKHDQ: + case X86ISD::PUNPCKHQDQ: + DecodePUNPCKHMask(NumElems, ShuffleMask); break; + case X86ISD::UNPCKHPS: + case X86ISD::UNPCKHPD: + DecodeUNPCKHPMask(NumElems, ShuffleMask); + break; + case X86ISD::PUNPCKLBW: + case X86ISD::PUNPCKLWD: + case X86ISD::PUNPCKLDQ: + case X86ISD::PUNPCKLQDQ: + DecodePUNPCKLMask(NumElems, ShuffleMask); + break; + case X86ISD::UNPCKLPS: + case X86ISD::UNPCKLPD: + DecodeUNPCKLPMask(NumElems, ShuffleMask); + break; + case X86ISD::MOVHLPS: + DecodeMOVHLPSMask(NumElems, ShuffleMask); + break; + case X86ISD::MOVLHPS: + DecodeMOVLHPSMask(NumElems, ShuffleMask); + break; + case X86ISD::PSHUFD: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFMask(NumElems, + cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::PSHUFHW: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::PSHUFLW: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::MOVSS: + case X86ISD::MOVSD: { + // The index 0 always comes from the first element of the second source, + // this is why MOVSS and MOVSD are used in the first place. The other + // elements come from the other positions of the first source vector. + unsigned OpNum = (Index == 0) ? 1 : 0; + return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG, + Depth+1); + } + default: + assert("not implemented for target shuffle node"); + return SDValue(); + } + + Index = ShuffleMask[Index]; + if (Index < 0) + return DAG.getUNDEF(VT.getVectorElementType()); + + SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1); + return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, + Depth+1); } - return NumZeros; -} -/// isVectorShift - Returns true if the shuffle can be implemented as a -/// logical left or right shift of a vector. -/// FIXME: split into pslldqi, psrldqi, palignr variants. -static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, - bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); + // Actual nodes that may contain scalar elements + if (Opcode == ISD::BIT_CONVERT) { + V = V.getOperand(0); + EVT SrcVT = V.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); - isLeft = true; - unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG); - if (!NumZeros) { - isLeft = false; - NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG); - if (!NumZeros) - return false; + if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems) + return SDValue(); + } + + if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) + return (Index == 0) ? V.getOperand(0) + : DAG.getUNDEF(VT.getVectorElementType()); + + if (V.getOpcode() == ISD::BUILD_VECTOR) + return V.getOperand(Index); + + return SDValue(); +} + +/// getNumOfConsecutiveZeros - Return the number of elements of a vector +/// shuffle operation which come from a consecutively from a zero. The +/// search can start in two diferent directions, from left or right. +static +unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems, + bool ZerosFromLeft, SelectionDAG &DAG) { + int i = 0; + + while (i < NumElems) { + unsigned Index = ZerosFromLeft ? i : NumElems-i-1; + SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0); + if (!(Elt.getNode() && + (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt)))) + break; + ++i; } + + return i; +} + +/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to +/// MaskE correspond consecutively to elements from one of the vector operands, +/// starting from its index OpIdx. Also tell OpNum which source vector operand. +static +bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE, + int OpIdx, int NumElems, unsigned &OpNum) { bool SeenV1 = false; bool SeenV2 = false; - for (unsigned i = NumZeros; i < NumElems; ++i) { - unsigned Val = isLeft ? (i - NumZeros) : i; - int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); - if (Idx_ < 0) + + for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) { + int Idx = SVOp->getMaskElt(i); + // Ignore undef indicies + if (Idx < 0) continue; - unsigned Idx = (unsigned) Idx_; + if (Idx < NumElems) SeenV1 = true; - else { - Idx -= NumElems; + else SeenV2 = true; - } - if (Idx != Val) + + // Only accept consecutive elements from the same vector + if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2)) return false; } - if (SeenV1 && SeenV2) + + OpNum = SeenV1 ? 0 : 1; + return true; +} + +/// isVectorShiftRight - Returns true if the shuffle can be implemented as a +/// logical left shift of a vector. +static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, + bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { + unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); + unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, + false /* check zeros from right */, DAG); + unsigned OpSrc; + + if (!NumZeros) + return false; + + // Considering the elements in the mask that are not consecutive zeros, + // check if they consecutively come from only one of the source vectors. + // + // V1 = {X, A, B, C} 0 + // \ \ \ / + // vector_shuffle V1, V2 <1, 2, 3, X> + // + if (!isShuffleMaskConsecutive(SVOp, + 0, // Mask Start Index + NumElems-NumZeros-1, // Mask End Index + NumZeros, // Where to start looking in the src vector + NumElems, // Number of elements in vector + OpSrc)) // Which source operand ? + return false; + + isLeft = false; + ShAmt = NumZeros; + ShVal = SVOp->getOperand(OpSrc); + return true; +} + +/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a +/// logical left shift of a vector. +static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, + bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { + unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); + unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, + true /* check zeros from left */, DAG); + unsigned OpSrc; + + if (!NumZeros) + return false; + + // Considering the elements in the mask that are not consecutive zeros, + // check if they consecutively come from only one of the source vectors. + // + // 0 { A, B, X, X } = V2 + // / \ / / + // vector_shuffle V1, V2 <X, X, 4, 5> + // + if (!isShuffleMaskConsecutive(SVOp, + NumZeros, // Mask Start Index + NumElems-1, // Mask End Index + 0, // Where to start looking in the src vector + NumElems, // Number of elements in vector + OpSrc)) // Which source operand ? return false; - ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1); + isLeft = true; ShAmt = NumZeros; + ShVal = SVOp->getOperand(OpSrc); return true; } +/// isVectorShift - Returns true if the shuffle can be implemented as a +/// logical left or right shift of a vector. +static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, + bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { + if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) || + isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt)) + return true; + + return false; +} /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. /// @@ -3779,9 +4162,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - // All zero's are handled with pxor, all one's are handled with pcmpeqd. - if (ISD::isBuildVectorAllZeros(Op.getNode()) - || ISD::isBuildVectorAllOnes(Op.getNode())) { + // All zero's are handled with pxor in SSE2 and above, xorps in SSE1. + // All one's are handled with pcmpeqd. In AVX, zero's are handled with + // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd + // is present, so AllOnes is ignored. + if (ISD::isBuildVectorAllZeros(Op.getNode()) || + (Op.getValueType().getSizeInBits() != 256 && + ISD::isBuildVectorAllOnes(Op.getNode()))) { // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are // eliminated on x86-32 hosts. @@ -3819,10 +4206,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } } - if (NumNonZero == 0) { - // All undef vector. Return an UNDEF. All zero vectors were handled above. + // All undef vector. Return an UNDEF. All zero vectors were handled above. + if (NumNonZero == 0) return DAG.getUNDEF(VT); - } // Special case for single non-zero, non-undef, element. if (NumNonZero == 1) { @@ -3960,7 +4346,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (EVTBits == 16 && NumElems == 8) { SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, - *this); + *this); if (V.getNode()) return V; } @@ -4014,28 +4400,51 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (LD.getNode()) return LD; - // For SSE 4.1, use inserts into undef. + // For SSE 4.1, use insertps to put the high elements into the low element. if (getSubtarget()->hasSSE41()) { - V[0] = DAG.getUNDEF(VT); - for (unsigned i = 0; i < NumElems; ++i) - if (Op.getOperand(i).getOpcode() != ISD::UNDEF) - V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0], + SDValue Result; + if (Op.getOperand(0).getOpcode() != ISD::UNDEF) + Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); + else + Result = DAG.getUNDEF(VT); + + for (unsigned i = 1; i < NumElems; ++i) { + if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue; + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, Op.getOperand(i), DAG.getIntPtrConstant(i)); - return V[0]; + } + return Result; } - // Otherwise, expand into a number of unpckl* - // e.g. for v4f32 + // Otherwise, expand into a number of unpckl*, start by extending each of + // our (non-undef) elements to the full vector width with the element in the + // bottom slot of the vector (which generates no code for SSE). + for (unsigned i = 0; i < NumElems; ++i) { + if (Op.getOperand(i).getOpcode() != ISD::UNDEF) + V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); + else + V[i] = DAG.getUNDEF(VT); + } + + // Next, we iteratively mix elements, e.g. for v4f32: // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - for (unsigned i = 0; i < NumElems; ++i) - V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); - NumElems >>= 1; - while (NumElems != 0) { - for (unsigned i = 0; i < NumElems; ++i) - V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]); - NumElems >>= 1; + unsigned EltStride = NumElems >> 1; + while (EltStride != 0) { + for (unsigned i = 0; i < EltStride; ++i) { + // If V[i+EltStride] is undef and this is the first round of mixing, + // then it is safe to just drop this shuffle: V[i] is already in the + // right place, the one element (since it's the first round) being + // inserted as undef can be dropped. This isn't safe for successive + // rounds because they will permute elements within both vectors. + if (V[i+EltStride].getOpcode() == ISD::UNDEF && + EltStride == NumElems/2) + continue; + + V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]); + } + EltStride >>= 1; } return V[0]; } @@ -4074,10 +4483,10 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { // 2. [ssse3] 1 x pshufb // 3. [ssse3] 2 x pshufb + 1 x por // 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) -static -SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, - const X86TargetLowering &TLI) { +SDValue +X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, + SelectionDAG &DAG) const { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); @@ -4128,7 +4537,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // quads, disable the next transformation since it does not help SSSE3. bool V1Used = InputQuads[0] || InputQuads[1]; bool V2Used = InputQuads[2] || InputQuads[3]; - if (TLI.getSubtarget()->hasSSSE3()) { + if (Subtarget->hasSSSE3()) { if (InputQuads.count() == 2 && V1Used && V2Used) { BestLoQuad = InputQuads.find_first(); BestHiQuad = InputQuads.find_next(BestLoQuad); @@ -4187,15 +4596,21 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // If we've eliminated the use of V2, and the new mask is a pshuflw or // pshufhw, that's as cheap as it gets. Return the new shuffle. if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { - return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, + unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW; + unsigned TargetMask = 0; + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); + TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()): + X86::getShufflePSHUFLWImmediate(NewV.getNode()); + V1 = NewV.getOperand(0); + return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG); } } // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. - if (TLI.getSubtarget()->hasSSSE3()) { + if (Subtarget->hasSSSE3()) { SmallVector<SDValue,16> pshufbMask; // If we have elements from both input vectors, set the high bit of the @@ -4262,6 +4677,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, MaskV.push_back(i); NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); + + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) + NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16, + NewV.getOperand(0), + X86::getShufflePSHUFLWImmediate(NewV.getNode()), + DAG); } // If BestHi >= 0, generate a pshufhw to put the high elements in order, @@ -4284,6 +4705,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, } NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); + + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) + NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16, + NewV.getOperand(0), + X86::getShufflePSHUFHWImmediate(NewV.getNode()), + DAG); } // In case BestHi & BestLo were both -1, which means each quadword has a word @@ -4473,7 +4900,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, SDValue V2 = SVOp->getOperand(1); unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; - EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); + EVT MaskVT = (NewWidth == 4) ? MVT::v4i16 : MVT::v2i32; EVT NewVT = MaskVT; switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unexpected!"); @@ -4697,6 +5124,129 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); } +static bool MayFoldVectorLoad(SDValue V) { + if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT) + V = V.getOperand(0); + if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) + V = V.getOperand(0); + if (MayFoldLoad(V)) + return true; + return false; +} + +static +SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, + bool HasSSE2) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + EVT VT = Op.getValueType(); + + assert(VT != MVT::v2i64 && "unsupported shuffle type"); + + if (HasSSE2 && VT == MVT::v2f64) + return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG); + + // v4f32 or v4i32 + return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG); +} + +static +SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + EVT VT = Op.getValueType(); + + assert((VT == MVT::v4i32 || VT == MVT::v4f32) && + "unsupported shuffle type"); + + if (V2.getOpcode() == ISD::UNDEF) + V2 = V1; + + // v4i32 or v4f32 + return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG); +} + +static +SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + EVT VT = Op.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + + // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second + // operand of these instructions is only memory, so check if there's a + // potencial load folding here, otherwise use SHUFPS or MOVSD to match the + // same masks. + bool CanFoldLoad = false; + + // Trivial case, when V2 comes from a load. + if (MayFoldVectorLoad(V2)) + CanFoldLoad = true; + + // When V1 is a load, it can be folded later into a store in isel, example: + // (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1) + // turns into: + // (MOVLPSmr addr:$src1, VR128:$src2) + // So, recognize this potential and also use MOVLPS or MOVLPD + if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op)) + CanFoldLoad = true; + + if (CanFoldLoad) { + if (HasSSE2 && NumElems == 2) + return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); + + if (NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); + } + + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + // movl and movlp will both match v2i64, but v2i64 is never matched by + // movl earlier because we make it strict to avoid messing with the movlp load + // folding logic (see the code above getMOVLP call). Match it here then, + // this is horrible, but will stay like this until we move all shuffle + // matching to x86 specific nodes. Note that for the 1st condition all + // types are matched with movsd. + if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp)) + return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); + else if (HasSSE2) + return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG); + + + assert(VT != MVT::v4i32 && "unsupported shuffle type"); + + // Invert the operand order and use SHUFPS to match it. + return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1, + X86::getShuffleSHUFImmediate(SVOp), DAG); +} + +static inline unsigned getUNPCKLOpcode(EVT VT) { + switch(VT.getSimpleVT().SimpleTy) { + case MVT::v4i32: return X86ISD::PUNPCKLDQ; + case MVT::v2i64: return X86ISD::PUNPCKLQDQ; + case MVT::v4f32: return X86ISD::UNPCKLPS; + case MVT::v2f64: return X86ISD::UNPCKLPD; + case MVT::v16i8: return X86ISD::PUNPCKLBW; + case MVT::v8i16: return X86ISD::PUNPCKLWD; + default: + llvm_unreachable("Unknow type for unpckl"); + } + return 0; +} + +static inline unsigned getUNPCKHOpcode(EVT VT) { + switch(VT.getSimpleVT().SimpleTy) { + case MVT::v4i32: return X86ISD::PUNPCKHDQ; + case MVT::v2i64: return X86ISD::PUNPCKHQDQ; + case MVT::v4f32: return X86ISD::UNPCKHPS; + case MVT::v2f64: return X86ISD::UNPCKHPD; + case MVT::v16i8: return X86ISD::PUNPCKHBW; + case MVT::v8i16: return X86ISD::PUNPCKHWD; + default: + llvm_unreachable("Unknow type for unpckh"); + } + return 0; +} + SDValue X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); @@ -4710,6 +5260,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; + bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX(); + bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX(); + MachineFunction &MF = DAG.getMachineFunction(); + bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); if (isZeroShuffle(SVOp)) return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); @@ -4718,7 +5272,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (SVOp->isSplat()) { if (isMMX || NumElems < 4) return Op; - return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); + return PromoteSplat(SVOp, DAG); } // If the shuffle can be profitably rewritten as a narrower shuffle, then @@ -4746,8 +5300,35 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } } - if (X86::isPSHUFDMask(SVOp)) - return Op; + // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and + // unpckh_undef). Only use pshufd if speed is more important than size. + if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + + if (X86::isPSHUFDMask(SVOp)) { + // The actual implementation will match the mask in the if above and then + // during isel it can match several different instructions, not only pshufd + // as its name says, sad but true, emulate the behavior for now... + if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64))) + return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG); + + unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp); + + if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32)) + return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG); + + if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) + return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1, + TargetMask, DAG); + + if (VT == MVT::v4f32) + return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1, + TargetMask, DAG); + } // Check if this can be converted into a logical shift. bool isLeft = false; @@ -4768,17 +5349,32 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); - if (!isMMX) - return Op; + if (!isMMX && !X86::isMOVLPMask(SVOp)) { + if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) + return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); + + if (VT == MVT::v4i32 || VT == MVT::v4f32) + return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG); + } } // FIXME: fold these into legal mask. - if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || - X86::isMOVSLDUPMask(SVOp) || - X86::isMOVHLPSMask(SVOp) || - X86::isMOVLHPSMask(SVOp) || - X86::isMOVLPMask(SVOp))) - return Op; + if (!isMMX) { + if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) + return getMOVLowToHigh(Op, dl, DAG, HasSSE2); + + if (X86::isMOVHLPSMask(SVOp)) + return getMOVHighToLow(Op, dl, DAG); + + if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); + + if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); + + if (X86::isMOVLPMask(SVOp)) + return getMOVLP(Op, dl, DAG, HasSSE2); + } if (ShouldXformToMOVHLPS(SVOp) || ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) @@ -4818,11 +5414,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKL_v_undef_Mask(SVOp) || - X86::isUNPCKH_v_undef_Mask(SVOp) || - X86::isUNPCKLMask(SVOp) || - X86::isUNPCKHMask(SVOp)) - return Op; + if (X86::isUNPCKLMask(SVOp)) + return (isMMX) ? + Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + + if (X86::isUNPCKHMask(SVOp)) + return (isMMX) ? + Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -4844,11 +5442,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // FIXME: this seems wrong. SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); - if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || - X86::isUNPCKH_v_undef_Mask(NewSVOp) || - X86::isUNPCKLMask(NewSVOp) || - X86::isUNPCKHMask(NewSVOp)) - return NewOp; + + if (X86::isUNPCKLMask(NewSVOp)) + return (isMMX) ? + NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + + if (X86::isUNPCKHMask(NewSVOp)) + return (isMMX) ? + NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); } // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. @@ -4857,15 +5458,52 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) return CommuteVectorShuffle(SVOp, DAG); - // Check for legal shuffle and return? - SmallVector<int, 16> PermMask; - SVOp->getMask(PermMask); - if (isShuffleMaskLegal(PermMask, VT)) + // The checks below are all present in isShuffleMaskLegal, but they are + // inlined here right now to enable us to directly emit target specific + // nodes, and remove one by one until they don't return Op anymore. + SmallVector<int, 16> M; + SVOp->getMask(M); + + // Very little shuffling can be done for 64-bit vectors right now. + if (VT.getSizeInBits() == 64) + return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ? Op : SDValue(); + + // FIXME: pshufb, blends, shifts. + if (VT.getVectorNumElements() == 2 || + ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + isPALIGNRMask(M, VT, Subtarget->hasSSSE3())) return Op; + if (isPSHUFHWMask(M, VT)) + return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1, + X86::getShufflePSHUFHWImmediate(SVOp), + DAG); + + if (isPSHUFLWMask(M, VT)) + return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1, + X86::getShufflePSHUFLWImmediate(SVOp), + DAG); + + if (isSHUFPMask(M, VT)) { + unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp); + if (VT == MVT::v4f32 || VT == MVT::v4i32) + return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2, + TargetMask, DAG); + if (VT == MVT::v2f64 || VT == MVT::v2i64) + return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2, + TargetMask, DAG); + } + + if (X86::isUNPCKL_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + if (X86::isUNPCKH_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { - SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); + SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG); if (NewOp.getNode()) return NewOp; } @@ -6922,24 +7560,58 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } - // ptest intrinsics. The intrinsic these come from are designed to return - // an integer value, not just an instruction so lower it to the ptest - // pattern and a setcc for the result. + // ptest and testp intrinsics. The intrinsic these come from are designed to + // return an integer value, not just an instruction so lower it to the ptest + // or testp pattern and a setcc for the result. case Intrinsic::x86_sse41_ptestz: case Intrinsic::x86_sse41_ptestc: - case Intrinsic::x86_sse41_ptestnzc:{ + case Intrinsic::x86_sse41_ptestnzc: + case Intrinsic::x86_avx_ptestz_256: + case Intrinsic::x86_avx_ptestc_256: + case Intrinsic::x86_avx_ptestnzc_256: + case Intrinsic::x86_avx_vtestz_ps: + case Intrinsic::x86_avx_vtestc_ps: + case Intrinsic::x86_avx_vtestnzc_ps: + case Intrinsic::x86_avx_vtestz_pd: + case Intrinsic::x86_avx_vtestc_pd: + case Intrinsic::x86_avx_vtestnzc_pd: + case Intrinsic::x86_avx_vtestz_ps_256: + case Intrinsic::x86_avx_vtestc_ps_256: + case Intrinsic::x86_avx_vtestnzc_ps_256: + case Intrinsic::x86_avx_vtestz_pd_256: + case Intrinsic::x86_avx_vtestc_pd_256: + case Intrinsic::x86_avx_vtestnzc_pd_256: { + bool IsTestPacked = false; unsigned X86CC = 0; switch (IntNo) { default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); + case Intrinsic::x86_avx_vtestz_ps: + case Intrinsic::x86_avx_vtestz_pd: + case Intrinsic::x86_avx_vtestz_ps_256: + case Intrinsic::x86_avx_vtestz_pd_256: + IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestz: + case Intrinsic::x86_avx_ptestz_256: // ZF = 1 X86CC = X86::COND_E; break; + case Intrinsic::x86_avx_vtestc_ps: + case Intrinsic::x86_avx_vtestc_pd: + case Intrinsic::x86_avx_vtestc_ps_256: + case Intrinsic::x86_avx_vtestc_pd_256: + IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestc: + case Intrinsic::x86_avx_ptestc_256: // CF = 1 X86CC = X86::COND_B; break; + case Intrinsic::x86_avx_vtestnzc_ps: + case Intrinsic::x86_avx_vtestnzc_pd: + case Intrinsic::x86_avx_vtestnzc_ps_256: + case Intrinsic::x86_avx_vtestnzc_pd_256: + IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestnzc: + case Intrinsic::x86_avx_ptestnzc_256: // ZF and CF = 0 X86CC = X86::COND_A; break; @@ -6947,7 +7619,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); - SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS); + unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST; + SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS); SDValue CC = DAG.getConstant(X86CC, MVT::i8); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); @@ -7110,12 +7783,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Handler = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP, - getPointerTy()); + SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, + Subtarget->is64Bit() ? X86::RBP : X86::EBP, + getPointerTy()); unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); - SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame, - DAG.getIntPtrConstant(-TD->getPointerSize())); + SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, + DAG.getIntPtrConstant(TD->getPointerSize())); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); @@ -7218,7 +7892,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; if (InRegCount > 2) { - report_fatal_error("Nest register in use - reduce number of inreg parameters!"); + report_fatal_error("Nest register in use - reduce number of inreg" + " parameters!"); } } break; @@ -7439,6 +8114,86 @@ SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const { return Res; } +SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + + LLVMContext *Context = DAG.getContext(); + + assert(Subtarget->hasSSE41() && "Cannot lower SHL without SSE4.1 or later"); + + if (VT == MVT::v4i32) { + Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32), + Op.getOperand(1), DAG.getConstant(23, MVT::i32)); + + ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U)); + + std::vector<Constant*> CV(4, CI); + Constant *C = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); + + Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend); + Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op); + Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); + return DAG.getNode(ISD::MUL, dl, VT, Op, R); + } + if (VT == MVT::v16i8) { + // a = a << 5; + Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), + Op.getOperand(1), DAG.getConstant(5, MVT::i32)); + + ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15)); + ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63)); + + std::vector<Constant*> CVM1(16, CM1); + std::vector<Constant*> CVM2(16, CM2); + Constant *C = ConstantVector::get(CVM1); + SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); + + // r = pblendv(r, psllw(r & (char16)15, 4), a); + M = DAG.getNode(ISD::AND, dl, VT, R, M); + M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, + DAG.getConstant(4, MVT::i32)); + R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), + R, M, Op); + // a += a + Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); + + C = ConstantVector::get(CVM2); + CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, false, false, 16); + + // r = pblendv(r, psllw(r & (char16)63, 2), a); + M = DAG.getNode(ISD::AND, dl, VT, R, M); + M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, + DAG.getConstant(2, MVT::i32)); + R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), + R, M, Op); + // a += a + Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); + + // return pblendv(r, r+r, a); + R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), + R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op); + return R; + } + return SDValue(); +} SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { // Lower the "add/sub/mul with overflow" instruction into a regular ins plus @@ -7508,6 +8263,50 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return Sum; } +SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ + DebugLoc dl = Op.getDebugLoc(); + + if (!Subtarget->hasSSE2()) { + SDValue Chain = Op.getOperand(0); + SDValue Zero = DAG.getConstant(0, + Subtarget->is64Bit() ? MVT::i64 : MVT::i32); + SDValue Ops[] = { + DAG.getRegister(X86::ESP, MVT::i32), // Base + DAG.getTargetConstant(1, MVT::i8), // Scale + DAG.getRegister(0, MVT::i32), // Index + DAG.getTargetConstant(0, MVT::i32), // Disp + DAG.getRegister(0, MVT::i32), // Segment. + Zero, + Chain + }; + SDNode *Res = + DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, + array_lengthof(Ops)); + return SDValue(Res, 0); + } + + unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue(); + if (!isDev) + return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); + + unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + + // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; + if (!Op1 && !Op2 && !Op3 && Op4) + return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0)); + + // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; + if (Op1 && !Op2 && !Op3 && !Op4) + return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0)); + + // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), + // (MFENCE)>; + return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); +} + SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { EVT T = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); @@ -7597,6 +8396,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); @@ -7640,6 +8440,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTLZ: return LowerCTLZ(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); case ISD::MUL: return LowerMUL_V2I64(Op, DAG); + case ISD::SHL: return LowerSHL(Op, DAG); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: @@ -7852,6 +8653,40 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::AND: return "X86ISD::AND"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; + case X86ISD::TESTP: return "X86ISD::TESTP"; + case X86ISD::PALIGN: return "X86ISD::PALIGN"; + case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; + case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; + case X86ISD::PSHUFHW_LD: return "X86ISD::PSHUFHW_LD"; + case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; + case X86ISD::PSHUFLW_LD: return "X86ISD::PSHUFLW_LD"; + case X86ISD::SHUFPS: return "X86ISD::SHUFPS"; + case X86ISD::SHUFPD: return "X86ISD::SHUFPD"; + case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS"; + case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD"; + case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS"; + case X86ISD::MOVHLPD: return "X86ISD::MOVHLPD"; + case X86ISD::MOVLPS: return "X86ISD::MOVLPS"; + case X86ISD::MOVLPD: return "X86ISD::MOVLPD"; + case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP"; + case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP"; + case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP"; + case X86ISD::MOVSHDUP_LD: return "X86ISD::MOVSHDUP_LD"; + case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD"; + case X86ISD::MOVSD: return "X86ISD::MOVSD"; + case X86ISD::MOVSS: return "X86ISD::MOVSS"; + case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; + case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; + case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; + case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; + case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; + case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD"; + case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ"; + case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ"; + case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW"; + case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; + case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; + case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; } @@ -7863,6 +8698,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { // X86 supports extremely general addressing modes. CodeModel::Model M = getTargetMachine().getCodeModel(); + Reloc::Model R = getTargetMachine().getRelocationModel(); // X86 allows a sign-extended 32-bit immediate field as a displacement. if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL)) @@ -7882,7 +8718,8 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, return false; // If lower 4G is not available, then we must use rip-relative addressing. - if (Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1)) + if ((M != CodeModel::Small || R != Reloc::Static) && + Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1)) return false; } @@ -8368,19 +9205,31 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, } // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 -// all of this code can be replaced with that in the .td file. +// or XMM0_V32I8 in AVX all of this code can be replaced with that +// in the .td file. MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { + assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) && + "Target must have SSE4.2 or AVX features enabled"); + DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); unsigned Opc; - if (memArg) - Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; - else - Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; + + if (!Subtarget->hasAVX()) { + if (memArg) + Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; + else + Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; + } else { + if (memArg) + Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm; + else + Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr; + } MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc)); @@ -8562,7 +9411,8 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, .addReg(X86::EAX, RegState::Implicit) .addReg(X86::ESP, RegState::Implicit) .addReg(X86::EAX, RegState::Define | RegState::Implicit) - .addReg(X86::ESP, RegState::Define | RegState::Implicit); + .addReg(X86::ESP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -8579,6 +9429,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo()); DebugLoc DL = MI->getDebugLoc(); MachineFunction *F = BB->getParent(); + bool IsWin64 = Subtarget->isTargetWin64(); assert(MI->getOperand(3).isGlobal() && "This should be a global"); @@ -8590,7 +9441,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, MI->getOperand(3).getTargetFlags()) .addReg(0); - MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); + MIB = BuildMI(*BB, MI, DL, TII->get(IsWin64 ? X86::WINCALL64m : X86::CALL64m)); addDirectMem(MIB, X86::RDI); } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, @@ -8727,12 +9578,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } // String/text processing lowering. case X86::PCMPISTRM128REG: + case X86::VPCMPISTRM128REG: return EmitPCMP(MI, BB, 3, false /* in-mem */); case X86::PCMPISTRM128MEM: + case X86::VPCMPISTRM128MEM: return EmitPCMP(MI, BB, 3, true /* in-mem */); case X86::PCMPESTRM128REG: + case X86::VPCMPESTRM128REG: return EmitPCMP(MI, BB, 5, false /* in mem */); case X86::PCMPESTRM128MEM: + case X86::VPCMPESTRM128MEM: return EmitPCMP(MI, BB, 5, true /* in mem */); // Atomic Lowering. @@ -8966,21 +9821,20 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); - ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); if (VT.getSizeInBits() != 128) return SDValue(); SmallVector<SDValue, 16> Elts; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) - Elts.push_back(DAG.getShuffleScalarElt(SVN, i)); - + Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); + return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); } -/// PerformShuffleCombine - Detect vector gather/scatter index generation -/// and convert it from being a bunch of shuffles and extracts to a simple -/// store and scalar loads to extract the elements. +/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index +/// generation and convert it from being a bunch of shuffles and extracts +/// to a simple store and scalar loads to extract the elements. static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { SDValue InputVector = N->getOperand(0); @@ -9030,8 +9884,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, // Store the value to a temporary stack slot. SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, 0, - false, false, 0); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, + 0, false, false, 0); // Replace each use (extract) with a load of the appropriate element. for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(), @@ -9045,11 +9899,12 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue(); SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy()); - SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), OffsetVal, StackPtr); + SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), + OffsetVal, StackPtr); // Load the scalar. - SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, ScalarAddr, - NULL, 0, false, false, 0); + SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, + ScalarAddr, NULL, 0, false, false, 0); // Replace the exact with the load. DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar); @@ -9087,8 +9942,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a min would handle NaNs incorrectly, and swapping // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { if (!UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; @@ -9126,8 +9980,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a max would handle NaNs incorrectly, and swapping // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { if (!UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; @@ -9156,8 +10009,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // cause it to handle NaNs incorrectly. if (!UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) { - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; std::swap(LHS, RHS); } @@ -9182,8 +10034,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, case ISD::SETULT: // Converting this to a max would handle NaNs incorrectly. - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; Opcode = X86ISD::FMAX; break; @@ -9193,8 +10044,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // cause it to handle NaNs incorrectly. if (!UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; std::swap(LHS, RHS); } @@ -9905,7 +10755,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { default: break; - case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); case ISD::EXTRACT_VECTOR_ELT: return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); @@ -9922,6 +10771,28 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); + case X86ISD::SHUFPS: // Handle all target specific shuffles + case X86ISD::SHUFPD: + case X86ISD::PUNPCKHBW: + case X86ISD::PUNPCKHWD: + case X86ISD::PUNPCKHDQ: + case X86ISD::PUNPCKHQDQ: + case X86ISD::UNPCKHPS: + case X86ISD::UNPCKHPD: + case X86ISD::PUNPCKLBW: + case X86ISD::PUNPCKLWD: + case X86ISD::PUNPCKLDQ: + case X86ISD::PUNPCKLQDQ: + case X86ISD::UNPCKLPS: + case X86ISD::UNPCKLPD: + case X86ISD::MOVHLPS: + case X86ISD::MOVLHPS: + case X86ISD::PSHUFD: + case X86ISD::PSHUFHW: + case X86ISD::PSHUFLW: + case X86ISD::MOVSS: + case X86ISD::MOVSD: + case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); } return SDValue(); @@ -9956,14 +10827,6 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { } } -static bool MayFoldLoad(SDValue Op) { - return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); -} - -static bool MayFoldIntoStore(SDValue Op) { - return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); -} - /// IsDesirableToPromoteOp - This method query the target whether it is /// beneficial for dag combiner to promote the specified node. If true, it /// should return the desired promotion type by reference. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 4e4daa4bc5ca..d2d9b28a0396 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -248,6 +248,44 @@ namespace llvm { // PTEST - Vector bitwise comparisons PTEST, + // TESTP - Vector packed fp sign bitwise comparisons + TESTP, + + // Several flavors of instructions with vector shuffle behaviors. + PALIGN, + PSHUFD, + PSHUFHW, + PSHUFLW, + PSHUFHW_LD, + PSHUFLW_LD, + SHUFPD, + SHUFPS, + MOVDDUP, + MOVSHDUP, + MOVSLDUP, + MOVSHDUP_LD, + MOVSLDUP_LD, + MOVLHPS, + MOVLHPD, + MOVHLPS, + MOVHLPD, + MOVLPS, + MOVLPD, + MOVSD, + MOVSS, + UNPCKLPS, + UNPCKLPD, + UNPCKHPS, + UNPCKHPD, + PUNPCKLBW, + PUNPCKLWD, + PUNPCKLDQ, + PUNPCKLQDQ, + PUNPCKHBW, + PUNPCKHWD, + PUNPCKHDQ, + PUNPCKHQDQ, + // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, // according to %al. An operator is needed so that this can be expanded // with control flow. @@ -265,7 +303,13 @@ namespace llvm { ATOMXOR64_DAG, ATOMAND64_DAG, ATOMNAND64_DAG, - ATOMSWAP64_DAG + ATOMSWAP64_DAG, + + // Memory barrier + MEMBARRIER, + MFENCE, + SFENCE, + LFENCE // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be @@ -584,12 +628,19 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + /// getStackCookieLocation - Return true if the target stores stack /// protector cookies at a fixed offset in some non-standard address /// space, and populates the address space and offset as /// appropriate. virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const; + protected: + std::pair<const TargetRegisterClass*, uint8_t> + findRepresentativeClass(EVT VT) const; + private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -710,11 +761,16 @@ namespace llvm { SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSHL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; + + // Utility functions to help LowerVECTOR_SHUFFLE + SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 42d0e7f9778a..0884b61425e9 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -73,11 +73,7 @@ def GetLo32XForm : SDNodeXForm<imm, [{ return getI32Imm((unsigned)N->getZExtValue()); }]>; -def i64immSExt32 : PatLeaf<(i64 imm), [{ - // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit - // sign extended field. - return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue(); -}]>; +def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>; def i64immZExt32 : PatLeaf<(i64 imm), [{ @@ -158,7 +154,7 @@ let isCall = 1 in // FIXME: We need to teach codegen about single list of call-clobbered // registers. -let isCall = 1 in +let isCall = 1, isCodeGenOnly = 1 in // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. Uses for argument @@ -168,7 +164,7 @@ let isCall = 1 in MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], Uses = [RSP] in { - def WINCALL64pcrel32 : I<0xE8, RawFrm, + def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), "call\t$dst", []>, Requires<[IsWin64]>; @@ -182,7 +178,8 @@ let isCall = 1 in } -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + isCodeGenOnly = 1 in let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, @@ -216,9 +213,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), "jmp{q}\t$dst", []>; def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", - [(brind GR64:$dst)]>; + [(brind GR64:$dst)]>, Requires<[In64BitMode]>; def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", - [(brind (loadi64 addr:$dst))]>; + [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>; def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), "ljmp{q}\t{*}$dst", []>; } @@ -246,7 +243,7 @@ def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in def LEAVE64 : I<0xC9, RawFrm, - (outs), (ins), "leave", []>; + (outs), (ins), "leave", []>, Requires<[In64BitMode]>; let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { let mayLoad = 1 in { def POP64r : I<0x58, AddRegFrm, @@ -330,7 +327,7 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; // Fast system-call instructions def SYSEXIT64 : RI<0x35, RawFrm, - (outs), (ins), "sysexit", []>, TB; + (outs), (ins), "sysexit", []>, TB, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Move Instructions... @@ -374,6 +371,7 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store i64immSExt32:$src, addr:$dst)]>; /// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC. +let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>; @@ -388,7 +386,13 @@ let mayStore = 1 in def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>; +} +// FIXME: These definitions are utterly broken +// Just leave them commented out for now because they're useless outside +// of the large code model, and most compilers won't generate the instructions +// in question. +/* def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src), "mov{q}\t{$src, %rax|%rax, $src}", []>; def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src), @@ -397,6 +401,7 @@ def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins), "mov{q}\t{%rax, $dst|$dst, %rax}", []>; def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), "mov{q}\t{%rax, $dst|$dst, %rax}", []>; +*/ // Moves to and from segment registers def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), @@ -1316,14 +1321,13 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), [] >, TB; -def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), +def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB, - REX_W; + [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. -def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), +def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi64 addr:$src1), i64immSExt8:$src2))]>, TB; @@ -1537,116 +1541,6 @@ def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C64r)>; //===----------------------------------------------------------------------===// -// Conversion Instructions... -// - -// f64 -> signed i64 -def CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src), - "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>; -def CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src), - "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>; -def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "cvtsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse2_cvtsd2si64 VR128:$src))]>; -def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), - (ins f128mem:$src), - "cvtsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (int_x86_sse2_cvtsd2si64 - (load addr:$src)))]>; -def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src), - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (fp_to_sint FR64:$src))]>; -def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src), - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>; -def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse2_cvttsd2si64 VR128:$src))]>; -def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), - (ins f128mem:$src), - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse2_cvttsd2si64 - (load addr:$src)))]>; - -// Signed i64 -> f64 -def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), - "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp GR64:$src))]>; -def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), - "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>; - -let Constraints = "$src1 = $dst" in { -def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), - "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (int_x86_sse2_cvtsi642sd VR128:$src1, - GR64:$src2))]>; -def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2), - "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (int_x86_sse2_cvtsi642sd VR128:$src1, - (loadi64 addr:$src2)))]>; -} // Constraints = "$src1 = $dst" - -// Signed i64 -> f32 -def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src), - "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp GR64:$src))]>; -def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src), - "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>; - -let Constraints = "$src1 = $dst" in { - def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), - "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (int_x86_sse_cvtsi642ss VR128:$src1, - GR64:$src2))]>; - def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, i64mem:$src2), - "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (int_x86_sse_cvtsi642ss VR128:$src1, - (loadi64 addr:$src2)))]>; -} // Constraints = "$src1 = $dst" - -// f32 -> signed i64 -def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src), - "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>; -def CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src), - "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>; -def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "cvtss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse_cvtss2si64 VR128:$src))]>; -def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src), - "cvtss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (int_x86_sse_cvtss2si64 - (load addr:$src)))]>; -def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src), - "cvttss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (fp_to_sint FR32:$src))]>; -def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src), - "cvttss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>; -def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "cvttss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse_cvttss2si64 VR128:$src))]>; -def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), - (ins f32mem:$src), - "cvttss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse_cvttss2si64 (load addr:$src)))]>; - // Descriptor-table support instructions // LLDT is not interpreted specially in 64-bit mode because there is no sign @@ -1726,6 +1620,14 @@ def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), // Atomic Instructions //===----------------------------------------------------------------------===// +// TODO: Get this to fold the constant into the instruction. +let hasSideEffects = 1, Defs = [ESP] in +def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero), + "lock\n\t" + "or{q}\t{$zero, (%rsp)|(%rsp), $zero}", + [(X86MemBarrierNoSSE GR64:$zero)]>, + Requires<[In64BitMode]>, LOCK; + let Defs = [RAX, EFLAGS], Uses = [RAX] in { def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), "lock\n\t" @@ -1772,7 +1674,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), // Optimized codegen when the non-memory output is not used. let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { // FIXME: Use normal add / sub instructions and add lock prefix dynamically. -def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), +def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "lock\n\t" "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs), diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td new file mode 100644 index 000000000000..d868773d2d69 --- /dev/null +++ b/lib/Target/X86/X86InstrFMA.td @@ -0,0 +1,60 @@ +//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes FMA (Fused Multiply-Add) instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// FMA3 - Intel 3 operand Fused Multiply-Add instructions +//===----------------------------------------------------------------------===// + +multiclass fma_rm<bits<8> opc, string OpcodeStr> { + def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; +} + +multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, + string OpcodeStr, string PackTy> { + defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>; + defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>; + defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>; +} + +let isAsmParserOnly = 1 in { + // Fused Multiply-Add + defm VFMADDPS : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">; + defm VFMADDPD : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W; + defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">; + defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W; + defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">; + defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W; + defm VFMSUBPS : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">; + defm VFMSUBPD : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W; + + // Fused Negative Multiply-Add + defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">; + defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W; + defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">; + defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W; +} diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index da93de988d50..9c9bcc7d0b6a 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -108,10 +108,6 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>; } -let isTerminator = 1 in - let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in - def FP_REG_KILL : I<0, Pseudo, (outs), (ins), "##FP_REG_KILL", []>; - // All FP Stack operations are represented with four instructions here. The // first three instructions, generated by the instruction selector, use "RFP32" // "RFP64" or "RFP80" registers: traditional register files to reference 32-bit, @@ -157,7 +153,7 @@ def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR } -// FpIf32, FpIf64 - Floating Point Psuedo Instruction template. +// FpIf32, FpIf64 - Floating Point Pseudo Instruction template. // f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1. // f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2. // f80 instructions cannot use SSE and use neither of these. diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index cc3fdf1efd7b..79187e9a76d7 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -39,6 +39,7 @@ def MRM_E8 : Format<39>; def MRM_F0 : Format<40>; def MRM_F8 : Format<41>; def MRM_F9 : Format<42>; +def RawFrmImm16 : Format<43>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -210,7 +211,7 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, class FPI<bits<8> o, Format F, dag outs, dag ins, string asm> : I<o, F, outs, ins, asm, []> {} -// FpI_ - Floating Point Psuedo Instruction template. Not Predicated. +// FpI_ - Floating Point Pseudo Instruction template. Not Predicated. class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern> : X86Inst<0, Pseudo, NoImm, outs, ins, ""> { let FPForm = fp; @@ -224,13 +225,13 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern> // Iseg32 - 16-bit segment selector, 32-bit offset class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> { + list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> { let Pattern = pattern; let CodeSize = 3; } class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> { + list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> { let Pattern = pattern; let CodeSize = 3; } @@ -411,6 +412,20 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, Requires<[HasSSE42]>; +// AVX Instruction Templates: +// Instructions introduced in AVX (no SSE equivalent forms) +// +// AVX8I - AVX instructions with T8 and OpSize prefix. +// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8. +class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize, + Requires<[HasAVX]>; +class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize, + Requires<[HasAVX]>; + // AES Instruction Templates: // // AES8I @@ -425,6 +440,18 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, Requires<[HasAES]>; +// CLMUL Instruction Templates +class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>; + +// FMA3 Instruction Templates +class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + OpSize, VEX_4V, Requires<[HasFMA3]>; + // X86-64 Instruction templates... // diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 71c4e8bc147f..01149b699213 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -117,9 +117,67 @@ def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, - SDTCisVT<1, v4f32>, - SDTCisVT<2, v4f32>]>; + SDTCisVec<1>, + SDTCisSameAs<2, 1>]>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; +def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; + +// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get +// translated into one of the target nodes below during lowering. +// Note: this is a work in progress... +def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>; + +def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0,1>, SDTCisInt<2>]>; +def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisInt<3>]>; + +def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>, + SDTCisInt<2>]>; + +def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; + +def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; +def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; +def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>; + +def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>; +def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>; + +def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>; +def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>; + +def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>; +def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>; +def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>; + +def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>; +def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>; + +def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>; +def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>; +def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>; +def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; + +def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; +def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; + +def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; +def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; +def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; +def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; + +def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; +def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; +def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; +def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; + +def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>; +def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; +def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; +def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; //===----------------------------------------------------------------------===// // SSE Complex Patterns @@ -148,12 +206,13 @@ def sdmem : Operand<v2f64> { // SSE pattern fragments //===----------------------------------------------------------------------===// +// 128-bit load pattern fragments def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; -// FIXME: move this to a more appropriate place after all AVX is done. +// 256-bit load pattern fragments def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; @@ -174,6 +233,8 @@ def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>; + +// 128-bit aligned load pattern fragments def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>; def alignedloadv2f64 : PatFrag<(ops node:$ptr), @@ -183,7 +244,7 @@ def alignedloadv4i32 : PatFrag<(ops node:$ptr), def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>; -// FIXME: move this to a more appropriate place after all AVX is done. +// 256-bit aligned load pattern fragments def alignedloadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (alignedload node:$ptr))>; def alignedloadv4f64 : PatFrag<(ops node:$ptr), @@ -206,15 +267,20 @@ def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; + +// 128-bit memop pattern fragments def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; -// FIXME: move this to a more appropriate place after all AVX is done. +// 256-bit memop pattern fragments +def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>; def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; +def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; +def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>; // SSSE3 uses MMX registers for some instructions. They aren't aligned on a // 16-byte boundary. @@ -254,6 +320,7 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), return false; }]>; +// 128-bit bitconvert pattern fragments def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; @@ -261,6 +328,9 @@ def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; +// 256-bit bitconvert pattern fragments +def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; + def vzmovl_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ce471eadd78b..5280940cf437 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -235,6 +235,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::BT64ri8, X86::BT64mi8, 1, 0 }, { X86::CALL32r, X86::CALL32m, 1, 0 }, { X86::CALL64r, X86::CALL64m, 1, 0 }, + { X86::WINCALL64r, X86::WINCALL64m, 1, 0 }, { X86::CMP16ri, X86::CMP16mi, 1, 0 }, { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, { X86::CMP16rr, X86::CMP16mr, 1, 0 }, @@ -667,46 +668,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); } -bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - switch (MI.getOpcode()) { - default: - return false; - case X86::MOV8rr: - case X86::MOV8rr_NOREX: - case X86::MOV16rr: - case X86::MOV32rr: - case X86::MOV64rr: - case X86::MOV32rr_TC: - case X86::MOV64rr_TC: - - // FP Stack register class copies - case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: - case X86::MOV_Fp3264: case X86::MOV_Fp3280: - case X86::MOV_Fp6432: case X86::MOV_Fp8032: - - // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64 - // copies are done with FsMOVAPSrr and FsMOVAPDrr. - - case X86::FsMOVAPSrr: - case X86::FsMOVAPDrr: - case X86::MOVAPSrr: - case X86::MOVAPDrr: - case X86::MOVDQArr: - case X86::MMX_MOVQ64rr: - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid register-register move instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SrcSubIdx = MI.getOperand(1).getSubReg(); - DstSubIdx = MI.getOperand(0).getSubReg(); - return true; - } -} - bool X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, @@ -827,7 +788,7 @@ static bool isFrameStoreOpcode(int Opcode) { unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameLoadOpcode(MI->getOpcode())) - if (isFrameOperand(MI, 1, FrameIndex)) + if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) return MI->getOperand(0).getReg(); return 0; } @@ -866,7 +827,8 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameStoreOpcode(MI->getOpcode())) - if (isFrameOperand(MI, 0, FrameIndex)) + if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 && + isFrameOperand(MI, 0, FrameIndex)) return MI->getOperand(X86::AddrNumOperands).getReg(); return 0; } @@ -1664,14 +1626,6 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { return !isPredicated(MI); } -// For purposes of branch analysis do not count FP_REG_KILL as a terminator. -static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI, - const X86InstrInfo &TII) { - if (MI->getOpcode() == X86::FP_REG_KILL) - return false; - return TII.isUnpredicatedTerminator(MI); -} - bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -1688,7 +1642,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Working from the bottom, when we see a non-terminator instruction, we're // done. - if (!isBrAnalysisUnpredicatedTerminator(I, *this)) + if (!isUnpredicatedTerminator(I)) break; // A terminator that isn't a branch can't easily be handled by this @@ -1891,6 +1845,33 @@ static bool isHReg(unsigned Reg) { return X86::GR8_ABCD_HRegClass.contains(Reg); } +// Try and copy between VR128/VR64 and GR64 registers. +static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { + // SrcReg(VR128) -> DestReg(GR64) + // SrcReg(VR64) -> DestReg(GR64) + // SrcReg(GR64) -> DestReg(VR128) + // SrcReg(GR64) -> DestReg(VR64) + + if (X86::GR64RegClass.contains(DestReg)) { + if (X86::VR128RegClass.contains(SrcReg)) { + // Copy from a VR128 register to a GR64 register. + return X86::MOVPQIto64rr; + } else if (X86::VR64RegClass.contains(SrcReg)) { + // Copy from a VR64 register to a GR64 register. + return X86::MOVSDto64rr; + } + } else if (X86::GR64RegClass.contains(SrcReg)) { + // Copy from a GR64 register to a VR128 register. + if (X86::VR128RegClass.contains(DestReg)) + return X86::MOV64toPQIrr; + // Copy from a GR64 register to a VR64 register. + else if (X86::VR64RegClass.contains(DestReg)) + return X86::MOV64toSDrr; + } + + return 0; +} + void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -1915,6 +1896,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = X86::MOVAPSrr; else if (X86::VR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MMX_MOVQ64rr; + else + Opc = CopyToFromAsymmetricReg(DestReg, SrcReg); if (Opc) { BuildMI(MBB, MI, DL, get(Opc), DestReg) @@ -2046,6 +2029,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); + assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && + "Stack slot too small for store"); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); @@ -2130,8 +2115,9 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, CalleeFrameSize += SlotSize; BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), - &X86::VR128RegClass, &RI); + RC, &RI); } } @@ -2161,8 +2147,9 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (!X86::VR128RegClass.contains(Reg) && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - &X86::VR128RegClass, &RI); + RC, &RI); } } return true; @@ -2423,10 +2410,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = (*LoadMI->memoperands_begin())->getAlignment(); else switch (LoadMI->getOpcode()) { + case X86::AVX_SET0PSY: + case X86::AVX_SET0PDY: + Alignment = 32; + break; case X86::V_SET0PS: case X86::V_SET0PD: case X86::V_SET0PI: case X86::V_SETALLONES: + case X86::AVX_SET0PS: + case X86::AVX_SET0PD: + case X86::AVX_SET0PI: Alignment = 16; break; case X86::FsFLD0SD: @@ -2453,12 +2447,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } else if (Ops.size() != 1) return NULL; + // Make sure the subregisters match. + // Otherwise we risk changing the size of the load. + if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg()) + return NULL; + SmallVector<MachineOperand,X86::AddrNumOperands> MOs; switch (LoadMI->getOpcode()) { case X86::V_SET0PS: case X86::V_SET0PD: case X86::V_SET0PI: case X86::V_SETALLONES: + case X86::AVX_SET0PS: + case X86::AVX_SET0PD: + case X86::AVX_SET0PI: + case X86::AVX_SET0PSY: + case X86::AVX_SET0PDY: case X86::FsFLD0SD: case X86::FsFLD0SS: { // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure. @@ -2485,10 +2489,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Create a constant-pool entry. MachineConstantPool &MCP = *MF.getConstantPool(); const Type *Ty; - if (LoadMI->getOpcode() == X86::FsFLD0SS) + unsigned Opc = LoadMI->getOpcode(); + if (Opc == X86::FsFLD0SS) Ty = Type::getFloatTy(MF.getFunction()->getContext()); - else if (LoadMI->getOpcode() == X86::FsFLD0SD) + else if (Opc == X86::FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction()->getContext()); + else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) + Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? @@ -2991,561 +2998,6 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { return false; } - -/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 -/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand -/// size, and 3) use of X86-64 extended registers. -unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { - unsigned REX = 0; - const TargetInstrDesc &Desc = MI.getDesc(); - - // Pseudo instructions do not need REX prefix byte. - if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) - return 0; - if (Desc.TSFlags & X86II::REX_W) - REX |= 1 << 3; - - unsigned NumOps = Desc.getNumOperands(); - if (NumOps) { - bool isTwoAddr = NumOps > 1 && - Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; - - // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. - unsigned i = isTwoAddr ? 1 : 0; - for (unsigned e = NumOps; i != e; ++i) { - const MachineOperand& MO = MI.getOperand(i); - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - if (isX86_64NonExtLowByteReg(Reg)) - REX |= 0x40; - } - } - - switch (Desc.TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: - if (isX86_64ExtendedReg(MI.getOperand(0))) - REX |= (1 << 0) | (1 << 2); - break; - case X86II::MRMSrcReg: { - if (isX86_64ExtendedReg(MI.getOperand(0))) - REX |= 1 << 2; - i = isTwoAddr ? 2 : 1; - for (unsigned e = NumOps; i != e; ++i) { - const MachineOperand& MO = MI.getOperand(i); - if (isX86_64ExtendedReg(MO)) - REX |= 1 << 0; - } - break; - } - case X86II::MRMSrcMem: { - if (isX86_64ExtendedReg(MI.getOperand(0))) - REX |= 1 << 2; - unsigned Bit = 0; - i = isTwoAddr ? 2 : 1; - for (; i != NumOps; ++i) { - const MachineOperand& MO = MI.getOperand(i); - if (MO.isReg()) { - if (isX86_64ExtendedReg(MO)) - REX |= 1 << Bit; - Bit++; - } - } - break; - } - case X86II::MRM0m: case X86II::MRM1m: - case X86II::MRM2m: case X86II::MRM3m: - case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: - case X86II::MRMDestMem: { - unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); - i = isTwoAddr ? 1 : 0; - if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) - REX |= 1 << 2; - unsigned Bit = 0; - for (; i != e; ++i) { - const MachineOperand& MO = MI.getOperand(i); - if (MO.isReg()) { - if (isX86_64ExtendedReg(MO)) - REX |= 1 << Bit; - Bit++; - } - } - break; - } - default: { - if (isX86_64ExtendedReg(MI.getOperand(0))) - REX |= 1 << 0; - i = isTwoAddr ? 2 : 1; - for (unsigned e = NumOps; i != e; ++i) { - const MachineOperand& MO = MI.getOperand(i); - if (isX86_64ExtendedReg(MO)) - REX |= 1 << 2; - } - break; - } - } - } - return REX; -} - -/// sizePCRelativeBlockAddress - This method returns the size of a PC -/// relative block address instruction -/// -static unsigned sizePCRelativeBlockAddress() { - return 4; -} - -/// sizeGlobalAddress - Give the size of the emission of this global address -/// -static unsigned sizeGlobalAddress(bool dword) { - return dword ? 8 : 4; -} - -/// sizeConstPoolAddress - Give the size of the emission of this constant -/// pool address -/// -static unsigned sizeConstPoolAddress(bool dword) { - return dword ? 8 : 4; -} - -/// sizeExternalSymbolAddress - Give the size of the emission of this external -/// symbol -/// -static unsigned sizeExternalSymbolAddress(bool dword) { - return dword ? 8 : 4; -} - -/// sizeJumpTableAddress - Give the size of the emission of this jump -/// table address -/// -static unsigned sizeJumpTableAddress(bool dword) { - return dword ? 8 : 4; -} - -static unsigned sizeConstant(unsigned Size) { - return Size; -} - -static unsigned sizeRegModRMByte(){ - return 1; -} - -static unsigned sizeSIBByte(){ - return 1; -} - -static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { - unsigned FinalSize = 0; - // If this is a simple integer displacement that doesn't require a relocation. - if (!RelocOp) { - FinalSize += sizeConstant(4); - return FinalSize; - } - - // Otherwise, this is something that requires a relocation. - if (RelocOp->isGlobal()) { - FinalSize += sizeGlobalAddress(false); - } else if (RelocOp->isCPI()) { - FinalSize += sizeConstPoolAddress(false); - } else if (RelocOp->isJTI()) { - FinalSize += sizeJumpTableAddress(false); - } else { - llvm_unreachable("Unknown value to relocate!"); - } - return FinalSize; -} - -static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op, - bool IsPIC, bool Is64BitMode) { - const MachineOperand &Op3 = MI.getOperand(Op+3); - int DispVal = 0; - const MachineOperand *DispForReloc = 0; - unsigned FinalSize = 0; - - // Figure out what sort of displacement we have to handle here. - if (Op3.isGlobal()) { - DispForReloc = &Op3; - } else if (Op3.isCPI()) { - if (Is64BitMode || IsPIC) { - DispForReloc = &Op3; - } else { - DispVal = 1; - } - } else if (Op3.isJTI()) { - if (Is64BitMode || IsPIC) { - DispForReloc = &Op3; - } else { - DispVal = 1; - } - } else { - DispVal = 1; - } - - const MachineOperand &Base = MI.getOperand(Op); - const MachineOperand &IndexReg = MI.getOperand(Op+2); - - unsigned BaseReg = Base.getReg(); - - // Is a SIB byte needed? - if ((!Is64BitMode || DispForReloc || BaseReg != 0) && - IndexReg.getReg() == 0 && - (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { - if (BaseReg == 0) { // Just a displacement? - // Emit special case [disp32] encoding - ++FinalSize; - FinalSize += getDisplacementFieldSize(DispForReloc); - } else { - unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg); - if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { - // Emit simple indirect register encoding... [EAX] f.e. - ++FinalSize; - // Be pessimistic and assume it's a disp32, not a disp8 - } else { - // Emit the most general non-SIB encoding: [REG+disp32] - ++FinalSize; - FinalSize += getDisplacementFieldSize(DispForReloc); - } - } - - } else { // We need a SIB byte, so start by outputting the ModR/M byte first - assert(IndexReg.getReg() != X86::ESP && - IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); - - bool ForceDisp32 = false; - if (BaseReg == 0 || DispForReloc) { - // Emit the normal disp32 encoding. - ++FinalSize; - ForceDisp32 = true; - } else { - ++FinalSize; - } - - FinalSize += sizeSIBByte(); - - // Do we need to output a displacement? - if (DispVal != 0 || ForceDisp32) { - FinalSize += getDisplacementFieldSize(DispForReloc); - } - } - return FinalSize; -} - - -static unsigned GetInstSizeWithDesc(const MachineInstr &MI, - const TargetInstrDesc *Desc, - bool IsPIC, bool Is64BitMode) { - - unsigned Opcode = Desc->Opcode; - unsigned FinalSize = 0; - - // Emit the lock opcode prefix as needed. - if (Desc->TSFlags & X86II::LOCK) ++FinalSize; - - // Emit segment override opcode prefix as needed. - switch (Desc->TSFlags & X86II::SegOvrMask) { - case X86II::FS: - case X86II::GS: - ++FinalSize; - break; - default: llvm_unreachable("Invalid segment!"); - case 0: break; // No segment override! - } - - // Emit the repeat opcode prefix as needed. - if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize; - - // Emit the operand size opcode prefix as needed. - if (Desc->TSFlags & X86II::OpSize) ++FinalSize; - - // Emit the address size opcode prefix as needed. - if (Desc->TSFlags & X86II::AdSize) ++FinalSize; - - bool Need0FPrefix = false; - switch (Desc->TSFlags & X86II::Op0Mask) { - case X86II::TB: // Two-byte opcode prefix - case X86II::T8: // 0F 38 - case X86II::TA: // 0F 3A - Need0FPrefix = true; - break; - case X86II::TF: // F2 0F 38 - ++FinalSize; - Need0FPrefix = true; - break; - case X86II::REP: break; // already handled. - case X86II::XS: // F3 0F - ++FinalSize; - Need0FPrefix = true; - break; - case X86II::XD: // F2 0F - ++FinalSize; - Need0FPrefix = true; - break; - case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: - case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: - ++FinalSize; - break; // Two-byte opcode prefix - default: llvm_unreachable("Invalid prefix!"); - case 0: break; // No prefix! - } - - if (Is64BitMode) { - // REX prefix - unsigned REX = X86InstrInfo::determineREX(MI); - if (REX) - ++FinalSize; - } - - // 0x0F escape code must be emitted just before the opcode. - if (Need0FPrefix) - ++FinalSize; - - switch (Desc->TSFlags & X86II::Op0Mask) { - case X86II::T8: // 0F 38 - ++FinalSize; - break; - case X86II::TA: // 0F 3A - ++FinalSize; - break; - case X86II::TF: // F2 0F 38 - ++FinalSize; - break; - } - - // If this is a two-address instruction, skip one of the register operands. - unsigned NumOps = Desc->getNumOperands(); - unsigned CurOp = 0; - if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) - CurOp++; - else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; - - switch (Desc->TSFlags & X86II::FormMask) { - default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); - case X86II::Pseudo: - // Remember the current PC offset, this is the PIC relocation - // base address. - switch (Opcode) { - default: - break; - case TargetOpcode::INLINEASM: { - const MachineFunction *MF = MI.getParent()->getParent(); - const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); - FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), - *MF->getTarget().getMCAsmInfo()); - break; - } - case TargetOpcode::DBG_LABEL: - case TargetOpcode::EH_LABEL: - case TargetOpcode::DBG_VALUE: - break; - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case X86::FP_REG_KILL: - break; - case X86::MOVPC32r: { - // This emits the "call" portion of this pseudo instruction. - ++FinalSize; - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - break; - } - } - CurOp = NumOps; - break; - case X86II::RawFrm: - ++FinalSize; - - if (CurOp != NumOps) { - const MachineOperand &MO = MI.getOperand(CurOp++); - if (MO.isMBB()) { - FinalSize += sizePCRelativeBlockAddress(); - } else if (MO.isGlobal()) { - FinalSize += sizeGlobalAddress(false); - } else if (MO.isSymbol()) { - FinalSize += sizeExternalSymbolAddress(false); - } else if (MO.isImm()) { - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - } else { - llvm_unreachable("Unknown RawFrm operand!"); - } - } - break; - - case X86II::AddRegFrm: - ++FinalSize; - ++CurOp; - - if (CurOp != NumOps) { - const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); - if (MO1.isImm()) - FinalSize += sizeConstant(Size); - else { - bool dword = false; - if (Opcode == X86::MOV64ri) - dword = true; - if (MO1.isGlobal()) { - FinalSize += sizeGlobalAddress(dword); - } else if (MO1.isSymbol()) - FinalSize += sizeExternalSymbolAddress(dword); - else if (MO1.isCPI()) - FinalSize += sizeConstPoolAddress(dword); - else if (MO1.isJTI()) - FinalSize += sizeJumpTableAddress(dword); - } - } - break; - - case X86II::MRMDestReg: { - ++FinalSize; - FinalSize += sizeRegModRMByte(); - CurOp += 2; - if (CurOp != NumOps) { - ++CurOp; - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - } - break; - } - case X86II::MRMDestMem: { - ++FinalSize; - FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86::AddrNumOperands + 1; - if (CurOp != NumOps) { - ++CurOp; - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - } - break; - } - - case X86II::MRMSrcReg: - ++FinalSize; - FinalSize += sizeRegModRMByte(); - CurOp += 2; - if (CurOp != NumOps) { - ++CurOp; - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - } - break; - - case X86II::MRMSrcMem: { - ++FinalSize; - FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); - CurOp += X86::AddrNumOperands + 1; - if (CurOp != NumOps) { - ++CurOp; - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - } - break; - } - - case X86II::MRM0r: case X86II::MRM1r: - case X86II::MRM2r: case X86II::MRM3r: - case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: - ++FinalSize; - if (Desc->getOpcode() == X86::LFENCE || - Desc->getOpcode() == X86::MFENCE) { - // Special handling of lfence and mfence; - FinalSize += sizeRegModRMByte(); - } else if (Desc->getOpcode() == X86::MONITOR || - Desc->getOpcode() == X86::MWAIT) { - // Special handling of monitor and mwait. - FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode. - } else { - ++CurOp; - FinalSize += sizeRegModRMByte(); - } - - if (CurOp != NumOps) { - const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); - if (MO1.isImm()) - FinalSize += sizeConstant(Size); - else { - bool dword = false; - if (Opcode == X86::MOV64ri32) - dword = true; - if (MO1.isGlobal()) { - FinalSize += sizeGlobalAddress(dword); - } else if (MO1.isSymbol()) - FinalSize += sizeExternalSymbolAddress(dword); - else if (MO1.isCPI()) - FinalSize += sizeConstPoolAddress(dword); - else if (MO1.isJTI()) - FinalSize += sizeJumpTableAddress(dword); - } - } - break; - - case X86II::MRM0m: case X86II::MRM1m: - case X86II::MRM2m: case X86II::MRM3m: - case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: { - - ++FinalSize; - FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86::AddrNumOperands; - - if (CurOp != NumOps) { - const MachineOperand &MO = MI.getOperand(CurOp++); - unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); - if (MO.isImm()) - FinalSize += sizeConstant(Size); - else { - bool dword = false; - if (Opcode == X86::MOV64mi32) - dword = true; - if (MO.isGlobal()) { - FinalSize += sizeGlobalAddress(dword); - } else if (MO.isSymbol()) - FinalSize += sizeExternalSymbolAddress(dword); - else if (MO.isCPI()) - FinalSize += sizeConstPoolAddress(dword); - else if (MO.isJTI()) - FinalSize += sizeJumpTableAddress(dword); - } - } - break; - - case X86II::MRM_C1: - case X86II::MRM_C8: - case X86II::MRM_C9: - case X86II::MRM_E8: - case X86II::MRM_F0: - FinalSize += 2; - break; - } - - case X86II::MRMInitReg: - ++FinalSize; - // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). - FinalSize += sizeRegModRMByte(); - ++CurOp; - break; - } - - if (!Desc->isVariadic() && CurOp != NumOps) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Cannot determine size: " << MI; - report_fatal_error(Msg.str()); - } - - - return FinalSize; -} - - -unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - const TargetInstrDesc &Desc = MI->getDesc(); - bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; - bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); - unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); - if (Desc.getOpcode() == X86::MOVPC32r) - Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); - return Size; -} - /// getGlobalBaseReg - Return a virtual register initialized with the /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. @@ -3573,7 +3025,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // that we don't include here. We don't want to replace instructions selected // by intrinsics. static const unsigned ReplaceableInstrs[][3] = { - //PackedInt PackedSingle PackedDouble + //PackedSingle PackedDouble PackedInt { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, @@ -3589,6 +3041,22 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI }, { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, + // AVX 128-bit support + { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, + { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, + { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr }, + { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, + { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, + { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, + { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, + { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, + { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm }, + { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, + { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, + { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, + { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI }, + { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, + { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, }; // FIXME: Some shuffle and unpack instructions have equivalents in different @@ -3627,7 +3095,7 @@ namespace { /// global base register for x86-32. struct CGBR : public MachineFunctionPass { static char ID; - CGBR() : MachineFunctionPass(&ID) {} + CGBR() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF) { const X86TargetMachine *TM = diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index ad0217adb475..f33620641e88 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -311,6 +311,12 @@ namespace X86II { MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, + + /// RawFrmImm16 - This is used for CALL FAR instructions, which have two + /// immediates, the first of which is a 16 or 32-bit immediate (specified by + /// the imm encoding) and the second is a 16-bit fixed value. In the AMD + /// manual, this operand is described as pntr16:32 and pntr16:16 + RawFrmImm16 = 43, FormMask = 63, @@ -439,27 +445,27 @@ namespace X86II { //===------------------------------------------------------------------===// // VEX - The opcode prefix used by AVX instructions - VEX = 1ULL << 32, + VEX = 1U << 0, // VEX_W - Has a opcode specific functionality, but is used in the same // way as REX_W is for regular SSE instructions. - VEX_W = 1ULL << 33, + VEX_W = 1U << 1, // VEX_4V - Used to specify an additional AVX/SSE register. Several 2 // address instructions in SSE are represented as 3 address ones in AVX // and the additional register is encoded in VEX_VVVV prefix. - VEX_4V = 1ULL << 34, + VEX_4V = 1U << 2, // VEX_I8IMM - Specifies that the last register used in a AVX instruction, // must be encoded in the i8 immediate field. This usually happens in // instructions with 4 operands. - VEX_I8IMM = 1ULL << 35, + VEX_I8IMM = 1U << 3, // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current // instruction uses 256-bit wide registers. This is usually auto detected if // a VR256 register is used, but some AVX instructions also have this field // marked when using a f256 memory references. - VEX_L = 1ULL << 36 + VEX_L = 1U << 4 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the @@ -522,11 +528,12 @@ namespace X86II { case X86II::AddRegFrm: case X86II::MRMDestReg: case X86II::MRMSrcReg: + case X86II::RawFrmImm16: return -1; case X86II::MRMDestMem: return 0; case X86II::MRMSrcMem: { - bool HasVEX_4V = TSFlags & X86II::VEX_4V; + bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V; unsigned FirstMemOp = 1; if (HasVEX_4V) ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). @@ -610,12 +617,6 @@ public: /// virtual const X86RegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - /// isCoalescableExtInstr - Return true if the instruction is a "coalescable" /// extension instruction. That is, it's like a copy where it's legal for the /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns @@ -826,16 +827,11 @@ public: if (!MO.isReg()) return false; return isX86_64ExtendedReg(MO.getReg()); } - static unsigned determineREX(const MachineInstr &MI); /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or /// higher) register? e.g. r8, xmm8, xmm13, etc. static bool isX86_64ExtendedReg(unsigned RegNo); - /// GetInstSize - Returns the size of the specified MachineInstr. - /// - virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const; - /// getGlobalBaseReg - Return a virtual register initialized with the /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 1efef5a80b1b..09b7721a621d 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -80,6 +80,21 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; +def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>; +def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>; + +def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER, + [SDNPHasChain]>; +def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE, + [SDNPHasChain]>; +def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER, + [SDNPHasChain]>; +def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER, + [SDNPHasChain]>; +def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER, + [SDNPHasChain]>; + + def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>; def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>; def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>; @@ -222,7 +237,7 @@ def i16mem : X86MemOperand<"printi16mem">; def i32mem : X86MemOperand<"printi32mem">; def i64mem : X86MemOperand<"printi64mem">; def i128mem : X86MemOperand<"printi128mem">; -//def i256mem : X86MemOperand<"printi256mem">; +def i256mem : X86MemOperand<"printi256mem">; def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; @@ -333,15 +348,21 @@ def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr", // X86 Instruction Predicate Definitions. def HasCMov : Predicate<"Subtarget->hasCMov()">; def NoCMov : Predicate<"!Subtarget->hasCMov()">; -def HasMMX : Predicate<"Subtarget->hasMMX()">; -def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; -def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; -def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; -def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; -def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; -def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; -def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; + +// FIXME: temporary hack to let codegen assert or generate poor code in case +// no AVX version of the desired intructions is present, this is better for +// incremental dev (without fallbacks it's easier to spot what's missing) +def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">; +def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">; +def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">; +def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">; +def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">; +def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">; +def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">; +def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">; + def HasAVX : Predicate<"Subtarget->hasAVX()">; +def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">; def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; @@ -393,9 +414,7 @@ def X86_COND_O : PatLeaf<(i8 13)>; def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE def X86_COND_S : PatLeaf<(i8 15)>; -def immSext8 : PatLeaf<(imm), [{ - return N->getSExtValue() == (int8_t)N->getSExtValue(); -}]>; +def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>; def i16immSExt8 : PatLeaf<(i16 immSext8)>; def i32immSExt8 : PatLeaf<(i32 immSext8)>; @@ -559,9 +578,10 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo, // The main point of having separate instruction are extra unmodelled effects // (compared to ordinary calls) like stack pointer change. -def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins), - "# dynamic stack allocation", - [(X86MingwAlloca)]>; +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in + def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins), + "# dynamic stack allocation", + [(X86MingwAlloca)]>; } // Nop @@ -574,10 +594,14 @@ let neverHasSideEffects = 1 in { } // Trap -def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; -def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>; +let Uses = [EFLAGS] in { + def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; +} +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", + [(int_x86_int (i8 3))]>; // FIXME: need to make sure that "int $3" matches int3 -def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; +def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", + [(int_x86_int imm:$trap)]>; def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>; @@ -650,16 +674,16 @@ let Uses = [ECX], isBranch = 1, isTerminator = 1 in // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", - [(brind GR32:$dst)]>; + [(brind GR32:$dst)]>, Requires<[In32BitMode]>; def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", - [(brind (loadi32 addr:$dst))]>; + [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>; - def FARJMP16i : Iseg16<0xEA, RawFrm, (outs), - (ins i16imm:$seg, i16imm:$off), - "ljmp{w}\t$seg, $off", []>, OpSize; - def FARJMP32i : Iseg32<0xEA, RawFrm, (outs), - (ins i16imm:$seg, i32imm:$off), - "ljmp{l}\t$seg, $off", []>; + def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), + (ins i16imm:$off, i16imm:$seg), + "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), + (ins i32imm:$off, i16imm:$seg), + "ljmp{l}\t{$seg, $off|$off, $seg}", []>; def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), "ljmp{w}\t{*}$dst", []>, OpSize; @@ -670,9 +694,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // Loop instructions -def LOOP : I<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; -def LOOPE : I<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; -def LOOPNE : I<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; +def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; +def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; +def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; //===----------------------------------------------------------------------===// // Call Instructions... @@ -695,12 +719,12 @@ let isCall = 1 in def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>; - def FARCALL16i : Iseg16<0x9A, RawFrm, (outs), - (ins i16imm:$seg, i16imm:$off), - "lcall{w}\t$seg, $off", []>, OpSize; - def FARCALL32i : Iseg32<0x9A, RawFrm, (outs), - (ins i16imm:$seg, i32imm:$off), - "lcall{l}\t$seg, $off", []>; + def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), + (ins i16imm:$off, i16imm:$seg), + "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), + (ins i32imm:$off, i16imm:$seg), + "lcall{l}\t{$seg, $off|$off, $seg}", []>; def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), "lcall{w}\t{*}$dst", []>, OpSize; @@ -721,7 +745,8 @@ def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl), // Tail call stuff. -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + isCodeGenOnly = 1 in let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, @@ -756,7 +781,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in // let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in def LEAVE : I<0xC9, RawFrm, - (outs), (ins), "leave", []>; + (outs), (ins), "leave", []>, Requires<[In32BitMode]>; def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; @@ -934,7 +959,7 @@ def SYSRET : I<0x07, RawFrm, def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB; def SYSEXIT : I<0x35, RawFrm, - (outs), (ins), "sysexit", []>, TB; + (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>; def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>; @@ -1025,17 +1050,23 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), - "mov{b}\t{$src, %al|%al, $src}", []>; + "mov{b}\t{$src, %al|%al, $src}", []>, + Requires<[In32BitMode]>; def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), - "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize, + Requires<[In32BitMode]>; def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), - "mov{l}\t{$src, %eax|%eax, $src}", []>; + "mov{l}\t{$src, %eax|%eax, $src}", []>, + Requires<[In32BitMode]>; def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins), - "mov{b}\t{%al, $dst|$dst, %al}", []>; + "mov{b}\t{%al, $dst|$dst, %al}", []>, + Requires<[In32BitMode]>; def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), - "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; + "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize, + Requires<[In32BitMode]>; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), - "mov{l}\t{%eax, $dst|$dst, %eax}", []>; + "mov{l}\t{%eax, $dst|$dst, %eax}", []>, + Requires<[In32BitMode]>; // Moves to and from segment registers def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), @@ -1087,6 +1118,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), [(store GR32:$src, addr:$dst)]>; /// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC. +let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; @@ -1101,10 +1133,12 @@ let mayStore = 1 in def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; +} // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be // encoded when a REX prefix is present. +let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV8rr_NOREX : I<0x88, MRMDestReg, (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), @@ -1118,6 +1152,7 @@ let mayLoad = 1, def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; +} // Moves to and from debug registers def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), @@ -1137,7 +1172,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), // Extra precision multiplication -// AL is really implied by AX, by the registers in Defs must match the +// AL is really implied by AX, but the registers in Defs must match the // SDNode results (i8, i32). let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", @@ -3895,6 +3930,20 @@ def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), // Atomic support // +// Memory barriers + +// TODO: Get this to fold the constant into the instruction. +def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), + "lock\n\t" + "or{l}\t{$zero, $dst|$dst, $zero}", + []>, Requires<[In32BitMode]>, LOCK; + +let hasSideEffects = 1 in { +def Int_MemBarrier : I<0, Pseudo, (outs), (ins), + "#MEMBARRIER", + [(X86MemBarrier)]>, Requires<[HasSSE2]>; +} + // Atomic swap. These are just normal xchg instructions. But since a memory // operand is referenced, the atomicity is ensured. let Constraints = "$val = $dst" in { @@ -4928,6 +4977,12 @@ include "X86Instr64bit.td" include "X86InstrFragmentsSIMD.td" //===----------------------------------------------------------------------===// +// FMA - Fused Multiply-Add support (requires FMA) +//===----------------------------------------------------------------------===// + +include "X86InstrFMA.td" + +//===----------------------------------------------------------------------===// // XMM Floating point support (requires SSE / SSE2) //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 6cf7ac83620e..11d4179534dc 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -164,7 +164,7 @@ let neverHasSideEffects = 1 in def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVFR642Qrr: SSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src), +def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", []>; def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ebe161b46bdc..f5466f83f519 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -142,7 +142,7 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_", !strconcat(SSEVer, !strconcat("_", !strconcat(OpcodeStr, FPSizeStr)))) RC:$src1, RC:$src2))], d>; @@ -150,7 +150,7 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_", !strconcat(SSEVer, !strconcat("_", !strconcat(OpcodeStr, FPSizeStr)))) RC:$src1, (mem_frag addr:$src2)))], d>; @@ -256,10 +256,10 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), let isAsmParserOnly = 1 in { def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)]>, XS, VEX_4V; + [(store FR32:$src, addr:$dst)]>, XS, VEX; def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>, XD, VEX_4V; + [(store FR64:$src, addr:$dst)]>, XD, VEX; } // Extract and store. @@ -340,6 +340,15 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; } + +def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>; +def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + +def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>; +def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), + (VMOVUPDYmr addr:$dst, VR256:$src)>; + def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; @@ -516,6 +525,14 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>; } +multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + X86MemOperand x86memop, string asm> { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + []>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + []>; +} + multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d> { @@ -526,35 +543,58 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, } multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, - SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, - string asm> { + X86MemOperand x86memop, string asm> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), - asm, []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), - (ins DstRC:$src1, x86memop:$src), asm, []>; + (ins DstRC:$src1, x86memop:$src), + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; } let isAsmParserOnly = 1 in { -defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX; -defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; -defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, - "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS, - VEX_4V; -defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, - "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD, - VEX_4V; +defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, + "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX; +defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, + "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, + VEX_W; +defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; +defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, + VEX, VEX_W; + +// The assembler can recognize rr 64-bit instructions by seeing a rxx +// register, but the same isn't true when only using memory operands, +// provide other assembly "l" and "q" forms to address this explicitly +// where appropriate to do so. +defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS, + VEX_4V; +defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, + VEX_4V, VEX_W; +defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD, + VEX_4V; +defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, + VEX_4V; +defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, + VEX_4V, VEX_W; } defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}">, XS; +defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, + "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si\t{$src, $dst|$dst, $src}">, XD; +defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, + "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS; +defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, + "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD; +defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, + "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W; // Conversion Instructions Intrinsics - Match intrinsics which expect MM // and/or XMM operand(s). @@ -570,10 +610,12 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm> { - def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))]>; - def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>; + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set DstRC:$dst, (Int SrcRC:$src))]>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>; } multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -588,35 +630,79 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, - PatFrag ld_frag, string asm> { + PatFrag ld_frag, string asm, bit Is2Addr = 1> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), - asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>; + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), - (ins DstRC:$src1, x86memop:$src2), asm, + (ins DstRC:$src1, x86memop:$src2), + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>; } let isAsmParserOnly = 1 in { defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS, - VEX; + f32mem, load, "cvtss2si">, XS, VEX; + defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, + int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">, + XS, VEX, VEX_W; defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, - VEX; + f128mem, load, "cvtsd2si">, XD, VEX; + defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, + int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">, + XD, VEX, VEX_W; + + // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_ + // Get rid of this hack or rename the intrinsics, there are several + // intructions that only match with the intrinsic form, why create duplicates + // to let them be recognized by the assembler? + defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; + defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem, + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W; } defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS; + f32mem, load, "cvtss2si">, XS; +defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, + f32mem, load, "cvtss2si{q}">, XS, REX_W; defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD; + f128mem, load, "cvtsd2si">, XD; +defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, + f128mem, load, "cvtsd2si">, XD, REX_W; +defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD, + REX_W; + +let isAsmParserOnly = 1 in { + defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V; + defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V, + VEX_W; + defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V; + defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD, + VEX_4V, VEX_W; +} let Constraints = "$src1 = $dst" in { defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, - "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS; + "cvtsi2ss">, XS; + defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse_cvtsi642ss, i64mem, loadi64, + "cvtsi2ss{q}">, XS, REX_W; defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse2_cvtsi2sd, i32mem, loadi32, - "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD; + "cvtsi2sd">, XD; + defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse2_cvtsi642sd, i64mem, loadi64, + "cvtsi2sd">, XD, REX_W; } // Instructions below don't have an AVX form. @@ -645,35 +731,48 @@ let Constraints = "$src1 = $dst" in { /// SSE 1 Only // Aliases for intrinsics -let isAsmParserOnly = 1, Pattern = []<dag> in { -defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32, - int_x86_sse_cvttss2si, f32mem, load, - "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS; -defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32, - int_x86_sse2_cvttsd2si, f128mem, load, - "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD; +let isAsmParserOnly = 1 in { +defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, + f32mem, load, "cvttss2si">, XS, VEX; +defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, + int_x86_sse_cvttss2si64, f32mem, load, + "cvttss2si">, XS, VEX, VEX_W; +defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, + f128mem, load, "cvttss2si">, XD, VEX; +defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, + int_x86_sse2_cvttsd2si64, f128mem, load, + "cvttss2si">, XD, VEX, VEX_W; } defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, - f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">, - XS; + f32mem, load, "cvttss2si">, XS; +defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, + int_x86_sse_cvttss2si64, f32mem, load, + "cvttss2si{q}">, XS, REX_W; defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">, - XD; + f128mem, load, "cvttss2si">, XD; +defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, + int_x86_sse2_cvttsd2si64, f128mem, load, + "cvttss2si{q}">, XD, REX_W; let isAsmParserOnly = 1, Pattern = []<dag> in { -defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, - "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX; -defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB, VEX; -defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB, VEX; +defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, + "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX; +defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, + "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX, + VEX_W; +defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB, VEX; +defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB, VEX; } let Pattern = []<dag> in { defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS; -defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/, +defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/, + "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; +defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */ } @@ -701,13 +800,11 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), let isAsmParserOnly = 1 in defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, - "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, - XS, VEX_4V; + int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>, + XS, VEX_4V; let Constraints = "$src1 = $dst" in defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, - "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS; + int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS; // Convert scalar single to scalar double let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix @@ -806,6 +903,7 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (bitconvert (memopv2i64 addr:$src))))]>, XS, Requires<[HasSSE2]>; + // Convert packed single/double fp to doubleword let isAsmParserOnly = 1 in { def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -964,11 +1062,11 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), let isAsmParserOnly = 1 in { def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", + "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, VEX, Requires<[HasAVX]>; def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", + "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd (load addr:$src)))]>, VEX, Requires<[HasAVX]>; @@ -1029,6 +1127,39 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memop addr:$src)))]>; +// AVX 256-bit register conversion intrinsics +// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below +// whenever possible to avoid declaring two versions of each one. +def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), + (VCVTDQ2PSYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)), + (VCVTDQ2PSYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), + (VCVTPD2PSYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), + (VCVTPD2PSYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), + (VCVTPS2DQYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), + (VCVTPS2DQYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), + (VCVTPS2PDYrr VR128:$src)>; +def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), + (VCVTPS2PDYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), + (VCVTTPD2DQYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), + (VCVTTPD2DQYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src), + (VCVTTPS2DQYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)), + (VCVTTPS2DQYrm addr:$src)>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// @@ -1193,16 +1324,14 @@ let isAsmParserOnly = 1 in { "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", SSEPackedDouble>, OpSize, VEX_4V; - let Pattern = []<dag> in { - defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps, - "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedSingle>, VEX_4V; - defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd, - "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - } + defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256, + "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedSingle>, VEX_4V; + defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256, + "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, @@ -1232,24 +1361,30 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, ValueType vt, string asm, PatFrag mem_frag, Domain d, bit IsConvertibleToThreeAddress = 0> { - def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm, - [(set VR128:$dst, (vt (shufp:$src3 - VR128:$src1, (mem_frag addr:$src2))))], d>; + def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm, + [(set RC:$dst, (vt (shufp:$src3 + RC:$src1, (mem_frag addr:$src2))))], d>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in - def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm, - [(set VR128:$dst, - (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>; + def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i8imm:$src3), asm, + [(set RC:$dst, + (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>; } let isAsmParserOnly = 1 in { - defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, - "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - memopv4f32, SSEPackedSingle>, VEX_4V; - defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, - "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", - memopv2f64, SSEPackedDouble>, OpSize, VEX_4V; + defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, + "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + memopv4f32, SSEPackedSingle>, VEX_4V; + defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, + "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + memopv8f32, SSEPackedSingle>, VEX_4V; + defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, + "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", + memopv2f64, SSEPackedDouble>, OpSize, VEX_4V; + defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, + "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", + memopv4f64, SSEPackedDouble>, OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { @@ -1351,12 +1486,23 @@ let isAsmParserOnly = 1 in { defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", SSEPackedDouble>, OpSize, VEX; - // FIXME: merge with multiclass above when the intrinsics come. - def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, + "movmskps", SSEPackedSingle>, VEX; + defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, + "movmskpd", SSEPackedDouble>, OpSize, + VEX; + + // Assembler Only + def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; + def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, + VEX; + def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; - def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, - VEX; + VEX; } //===----------------------------------------------------------------------===// @@ -1536,6 +1682,9 @@ let isCommutable = 0 in /// /// These three forms can each be reg+reg or reg+mem. /// + +/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those +/// classes below multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, bit Is2Addr = 1> { defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), @@ -1565,7 +1714,7 @@ multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr, } multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, - bit Is2Addr = 1> { + bit Is2Addr = 1> { defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS; defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, @@ -1573,37 +1722,57 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, } multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr, - bit Is2Addr = 1> { + bit Is2Addr = 1> { defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128, - !strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32, + !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB; defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128, - !strconcat(OpcodeStr, "pd"), "2", "_pd", f128mem, memopv2f64, + !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize; } +multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> { + defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256, + !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32, + SSEPackedSingle, 0>, TB; + + defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256, + !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64, + SSEPackedDouble, 0>, TB, OpSize; +} + // Binary Arithmetic instructions let isAsmParserOnly = 1 in { defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_s_int<0x58, "add", 0>, basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_s_int<0x59, "mul", 0>, basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; let isCommutable = 0 in { defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_s_int<0x5E, "div", 0>, basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_s_int<0x5F, "max", 0>, basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, - basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V; + basic_sse12_fp_binop_p_int<0x5F, "max", 0>, + basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V; defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_s_int<0x5D, "min", 0>, basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_p_int<0x5D, "min", 0>, + basic_sse12_fp_binop_p_y_int<0x5D, "min">, basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; } } @@ -1668,20 +1837,20 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr, SDNode OpNode, Intrinsic F32Int> { def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, XS, Requires<[HasAVX, OptForSize]>; - def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(!strconcat("v", OpcodeStr), - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2), - !strconcat(!strconcat("v", OpcodeStr), - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, + "ss\t{$src, $dst, $dst|$dst, $dst, $src}"), + [(set VR128:$dst, (F32Int VR128:$src))]>; + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), + !strconcat(OpcodeStr, + "ss\t{$src, $dst, $dst|$dst, $dst, $src}"), + [(set VR128:$dst, (F32Int sse_load_f32:$src))]>; } /// sse1_fp_unop_p - SSE1 unops in packed form. @@ -1715,6 +1884,16 @@ multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>; } +/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms. +multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, + Intrinsic V4F32Int> { + def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (V4F32Int VR256:$src))]>; + def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>; +} /// sse2_fp_unop_s - SSE2 unops in scalar form. multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, @@ -1738,21 +1917,19 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr, SDNode OpNode, Intrinsic F64Int> { - def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - !strconcat(OpcodeStr, - "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f64mem:$src2), - !strconcat(OpcodeStr, - "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; - def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, sdmem:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; + def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), + (ins FR64:$src1, f64mem:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"), + [(set VR128:$dst, (F64Int VR128:$src))]>; + def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), + !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"), + [(set VR128:$dst, (F64Int sse_load_f64:$src))]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -1787,29 +1964,48 @@ multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>; } +/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms. +multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, + Intrinsic V2F64Int> { + def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (V2F64Int VR256:$src))]>; + def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>; +} + let isAsmParserOnly = 1, Predicates = [HasAVX] in { // Square root. - defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, - sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, + defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>, + sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>, VEX_4V; defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>, sse2_fp_unop_p<0x51, "vsqrt", fsqrt>, sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>, sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>, + sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>, + sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>, + sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>, + sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>, VEX; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. - defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt, + defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>, VEX_4V; defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>, - sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX; + sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, + sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>, + sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX; - defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, + defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>, VEX_4V; defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>, - sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX; + sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, + sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>, + sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX; } // Square root. @@ -1898,6 +2094,13 @@ let isAsmParserOnly = 1 in { } } +def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src), + (VMOVNTDQYmr addr:$dst, VR256:$src)>; +def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src), + (VMOVNTPDYmr addr:$dst, VR256:$src)>; +def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src), + (VMOVNTPSYmr addr:$dst, VR256:$src)>; + def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; @@ -1961,11 +2164,14 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), // Load, store, and memory fence def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>; +def : Pat<(X86SFence), (SFENCE)>; // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. -// FIXME: Change encoding to pseudo! +// FIXME: Change encoding to pseudo! This is blocked right now by the x86 +// JIT implementatioan, it does not expand the instructions below like +// X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isCodeGenOnly = 1 in { def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", @@ -1977,6 +2183,26 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllZerosV))]>; } +// The same as done above but for AVX. The 128-bit versions are the +// same, but re-encoded. The 256-bit does not support PI version. +// FIXME: Change encoding to pseudo! This is blocked right now by the x86 +// JIT implementatioan, it does not expand the instructions below like +// X86MCInstLower does. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1, Predicates = [HasAVX] in { +def AVX_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V; +def AVX_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V; +def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; +def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; +let ExeDomain = SSEPackedInt in +def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllZerosV))]>; +} + def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; @@ -2003,35 +2229,47 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), //===---------------------------------------------------------------------===// // SSE2 - Move Aligned/Unaligned Packed Integer Instructions //===---------------------------------------------------------------------===// + let ExeDomain = SSEPackedInt in { // SSE integer instructions let isAsmParserOnly = 1 in { - let neverHasSideEffects = 1 in - def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; + let neverHasSideEffects = 1 in { + def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + } + def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; + def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; let canFoldAsLoad = 1, mayLoad = 1 in { - def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "movdqa\t{$src, $dst|$dst, $src}", - [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>, - VEX; - def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vmovdqu\t{$src, $dst|$dst, $src}", - [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, - XS, VEX, Requires<[HasAVX]>; + def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + let Predicates = [HasAVX] in { + def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + } } let mayStore = 1 in { - def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), - (ins i128mem:$dst, VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", - [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX; - def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "vmovdqu\t{$src, $dst|$dst, $src}", - [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, - XS, VEX, Requires<[HasAVX]>; + def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i256mem:$dst, VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + let Predicates = [HasAVX] in { + def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + } } } @@ -2084,6 +2322,10 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), } // ExeDomain = SSEPackedInt +def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>; +def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), + (VMOVDQUYmr addr:$dst, VR256:$src)>; + //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// @@ -2376,6 +2618,25 @@ let ExeDomain = SSEPackedInt in { } } // Constraints = "$src1 = $dst" +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), + (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), + (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), + (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>; + def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), + (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>; + def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), + (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + + // Shift up / down and insert zero's. + def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), + (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>; + def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), + (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; +} + let Predicates = [HasSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; @@ -2662,11 +2923,16 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg, imm:$src2))]>; // Insert -let isAsmParserOnly = 1, Predicates = [HasAVX] in - defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V; + def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), + "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, OpSize, VEX_4V; +} let Constraints = "$src1 = $dst" in - defm VPINSRW : sse2_pinsrw, TB, OpSize; + defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>; } // ExeDomain = SSEPackedInt @@ -2676,10 +2942,13 @@ let Constraints = "$src1 = $dst" in let ExeDomain = SSEPackedInt in { -let isAsmParserOnly = 1 in -def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), +let isAsmParserOnly = 1 in { +def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX; +def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX; +} def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; @@ -2939,18 +3208,20 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), // Instructions to match in the assembler let isAsmParserOnly = 1 in { -// This instructions is in fact an alias to movd with 64 bit dst def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; +// Recognize "movd" with GR64 destination, but encode as a "movq" +def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; } // Instructions for the disassembler // xr = XMM register // xm = mem64 -let isAsmParserOnly = 1 in +let isAsmParserOnly = 1, Predicates = [HasAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -2970,19 +3241,14 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; +def : Pat<(X86LFence), (LFENCE)>; +def : Pat<(X86MFence), (MFENCE)>; + // Pause. This "instruction" is encoded as "rep; nop", so even though it // was introduced with SSE2, it's backward compatible. def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; -//TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 0)), (NOOP)>; -def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; -def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 1)), (MFENCE)>; - // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. @@ -3027,13 +3293,13 @@ def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), // Convert Packed DW Integers to Packed Double FP let isAsmParserOnly = 1, Predicates = [HasAVX] in { def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -3041,6 +3307,17 @@ def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; +// AVX 256-bit register conversion intrinsics +def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), + (VCVTDQ2PDYrr VR128:$src)>; +def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)), + (VCVTDQ2PDYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), + (VCVTPD2DQYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), + (VCVTPD2DQYrm addr:$src)>; + //===---------------------------------------------------------------------===// // SSE3 - Move Instructions //===---------------------------------------------------------------------===// @@ -3057,9 +3334,20 @@ def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (memopv4f32 addr:$src), (undef)))]>; } +multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag, + string OpcodeStr> { +def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; +def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; +} + let isAsmParserOnly = 1, Predicates = [HasAVX] in { -defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; -defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; + // FIXME: Merge above classes when we have patterns for the ymm version + defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; + defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; + defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX; + defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX; } defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">; defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">; @@ -3076,15 +3364,31 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), (undef))))]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in - defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; +multiclass sse3_replicate_dfp_y<string OpcodeStr> { +def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>; +def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // FIXME: Merge above classes when we have patterns for the ymm version + defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; + defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; +} defm MOVDDUP : sse3_replicate_dfp<"movddup">; // Move Unaligned Integer -let isAsmParserOnly = 1 in +let isAsmParserOnly = 1, Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vlddqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; + "vlddqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; + def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "vlddqu\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX; +} def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; @@ -3125,35 +3429,39 @@ let AddedComplexity = 20 in // SSE3 - Arithmetic //===---------------------------------------------------------------------===// -multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> { +multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, bit Is2Addr = 1> { def rr : I<0xD0, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (Int VR128:$src1, - VR128:$src2))]>; + [(set RC:$dst, (Int RC:$src1, RC:$src2))]>; def rm : I<0xD0, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (Int VR128:$src1, - (memop addr:$src2)))]>; - + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>; } let isAsmParserOnly = 1, Predicates = [HasAVX], ExeDomain = SSEPackedDouble in { - defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD, - VEX_4V; - defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize, - VEX_4V; + defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, + f128mem, 0>, XD, VEX_4V; + defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, + f128mem, 0>, OpSize, VEX_4V; + defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, + f256mem, 0>, XD, VEX_4V; + defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, + f256mem, 0>, OpSize, VEX_4V; } let Constraints = "$src1 = $dst", Predicates = [HasSSE3], ExeDomain = SSEPackedDouble in { - defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD; - defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize; + defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, + f128mem>, XD; + defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, + f128mem>, TB, OpSize; } //===---------------------------------------------------------------------===// @@ -3161,61 +3469,72 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3], //===---------------------------------------------------------------------===// // Horizontal ops -class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> - : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), +multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, + X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> { + def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>; -class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> - : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>; + + def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>; -class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> - : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>; +} +multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, + X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> { + def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>; -class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> - : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>; + + def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>; + [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>; +} let isAsmParserOnly = 1, Predicates = [HasAVX] in { - def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; - def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; - def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; - def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; - def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; - def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; - def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; - def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; + defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, + int_x86_sse3_hadd_ps, 0>, VEX_4V; + defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, + int_x86_sse3_hadd_pd, 0>, VEX_4V; + defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, + int_x86_sse3_hsub_ps, 0>, VEX_4V; + defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, + int_x86_sse3_hsub_pd, 0>, VEX_4V; + defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, + int_x86_avx_hadd_ps_256, 0>, VEX_4V; + defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, + int_x86_avx_hadd_pd_256, 0>, VEX_4V; + defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, + int_x86_avx_hsub_ps_256, 0>, VEX_4V; + defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, + int_x86_avx_hsub_pd_256, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { - def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>; - def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>; - def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>; - def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>; - def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>; - def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>; - def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; - def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; + defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, + int_x86_sse3_hadd_ps>; + defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, + int_x86_sse3_hadd_pd>; + defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, + int_x86_sse3_hsub_ps>; + defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, + int_x86_sse3_hsub_pd>; } //===---------------------------------------------------------------------===// // SSSE3 - Packed Absolute Instructions //===---------------------------------------------------------------------===// -/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. -multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, - PatFrag mem_frag64, PatFrag mem_frag128, - Intrinsic IntId64, Intrinsic IntId128> { +/// SS3I_unop_rm_int_mm - Simple SSSE3 unary whose type can be v*{i8,i16,i32}. +multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, + PatFrag mem_frag64, Intrinsic IntId64> { def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 VR64:$src))]>; @@ -3224,7 +3543,11 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 (bitconvert (mem_frag64 addr:$src))))]>; +} +/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. +multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, + PatFrag mem_frag128, Intrinsic IntId128> { def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -3240,26 +3563,28 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, } let isAsmParserOnly = 1, Predicates = [HasAVX] in { - defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8, - int_x86_ssse3_pabs_b, + defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8, int_x86_ssse3_pabs_b_128>, VEX; - defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16, - int_x86_ssse3_pabs_w, + defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16, int_x86_ssse3_pabs_w_128>, VEX; - defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32, - int_x86_ssse3_pabs_d, + defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32, int_x86_ssse3_pabs_d_128>, VEX; } -defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8, - int_x86_ssse3_pabs_b, - int_x86_ssse3_pabs_b_128>; -defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16, - int_x86_ssse3_pabs_w, - int_x86_ssse3_pabs_w_128>; -defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32, - int_x86_ssse3_pabs_d, - int_x86_ssse3_pabs_d_128>; +defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8, + int_x86_ssse3_pabs_b_128>, + SS3I_unop_rm_int_mm<0x1C, "pabsb", memopv8i8, + int_x86_ssse3_pabs_b>; + +defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16, + int_x86_ssse3_pabs_w_128>, + SS3I_unop_rm_int_mm<0x1D, "pabsw", memopv4i16, + int_x86_ssse3_pabs_w>; + +defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32, + int_x86_ssse3_pabs_d_128>, + SS3I_unop_rm_int_mm<0x1E, "pabsd", memopv2i32, + int_x86_ssse3_pabs_d>; //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions @@ -3267,26 +3592,9 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32, /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, - PatFrag mem_frag64, PatFrag mem_frag128, - Intrinsic IntId64, Intrinsic IntId128, + PatFrag mem_frag128, Intrinsic IntId128, bit Is2Addr = 1> { let isCommutable = 1 in - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !if(Is2Addr, - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !if(Is2Addr, - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR64:$dst, - (IntId64 VR64:$src1, - (bitconvert (memopv8i8 addr:$src2))))]>; - - let isCommutable = 1 in def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !if(Is2Addr, @@ -3303,88 +3611,102 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, (IntId128 VR128:$src1, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } +multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, + PatFrag mem_frag64, Intrinsic IntId64> { + let isCommutable = 1 in + def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; + def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR64:$dst, + (IntId64 VR64:$src1, + (bitconvert (memopv8i8 addr:$src2))))]>; +} let isAsmParserOnly = 1, Predicates = [HasAVX] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16, - int_x86_ssse3_phadd_w, + defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16, int_x86_ssse3_phadd_w_128, 0>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32, - int_x86_ssse3_phadd_d, + defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32, int_x86_ssse3_phadd_d_128, 0>, VEX_4V; - defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16, - int_x86_ssse3_phadd_sw, + defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16, int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16, - int_x86_ssse3_phsub_w, + defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16, int_x86_ssse3_phsub_w_128, 0>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32, - int_x86_ssse3_phsub_d, + defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32, int_x86_ssse3_phsub_d_128, 0>, VEX_4V; - defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16, - int_x86_ssse3_phsub_sw, + defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16, int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; - defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8, - int_x86_ssse3_pmadd_ub_sw, + defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8, int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8, - int_x86_ssse3_pshuf_b, + defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8, int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8, - int_x86_ssse3_psign_b, + defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8, int_x86_ssse3_psign_b_128, 0>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16, - int_x86_ssse3_psign_w, + defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16, int_x86_ssse3_psign_w_128, 0>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32, - int_x86_ssse3_psign_d, + defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32, int_x86_ssse3_psign_d_128, 0>, VEX_4V; } -defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16, - int_x86_ssse3_pmul_hr_sw, +defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16, int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; } // None of these have i8 immediate fields. let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { - defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16, - int_x86_ssse3_phadd_w, - int_x86_ssse3_phadd_w_128>; - defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32, - int_x86_ssse3_phadd_d, - int_x86_ssse3_phadd_d_128>; - defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16, - int_x86_ssse3_phadd_sw, - int_x86_ssse3_phadd_sw_128>; - defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16, - int_x86_ssse3_phsub_w, - int_x86_ssse3_phsub_w_128>; - defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32, - int_x86_ssse3_phsub_d, - int_x86_ssse3_phsub_d_128>; - defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16, - int_x86_ssse3_phsub_sw, - int_x86_ssse3_phsub_sw_128>; - defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8, - int_x86_ssse3_pmadd_ub_sw, - int_x86_ssse3_pmadd_ub_sw_128>; - defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8, - int_x86_ssse3_pshuf_b, - int_x86_ssse3_pshuf_b_128>; - defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8, - int_x86_ssse3_psign_b, - int_x86_ssse3_psign_b_128>; - defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16, - int_x86_ssse3_psign_w, - int_x86_ssse3_psign_w_128>; - defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32, - int_x86_ssse3_psign_d, - int_x86_ssse3_psign_d_128>; -} -defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16, - int_x86_ssse3_pmul_hr_sw, - int_x86_ssse3_pmul_hr_sw_128>; + defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16, + int_x86_ssse3_phadd_w_128>, + SS3I_binop_rm_int_mm<0x01, "phaddw", memopv4i16, + int_x86_ssse3_phadd_w>; + defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32, + int_x86_ssse3_phadd_d_128>, + SS3I_binop_rm_int_mm<0x02, "phaddd", memopv2i32, + int_x86_ssse3_phadd_d>; + defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16, + int_x86_ssse3_phadd_sw_128>, + SS3I_binop_rm_int_mm<0x03, "phaddsw", memopv4i16, + int_x86_ssse3_phadd_sw>; + defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16, + int_x86_ssse3_phsub_w_128>, + SS3I_binop_rm_int_mm<0x05, "phsubw", memopv4i16, + int_x86_ssse3_phsub_w>; + defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32, + int_x86_ssse3_phsub_d_128>, + SS3I_binop_rm_int_mm<0x06, "phsubd", memopv2i32, + int_x86_ssse3_phsub_d>; + defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16, + int_x86_ssse3_phsub_sw_128>, + SS3I_binop_rm_int_mm<0x07, "phsubsw", memopv4i16, + int_x86_ssse3_phsub_sw>; + defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8, + int_x86_ssse3_pmadd_ub_sw_128>, + SS3I_binop_rm_int_mm<0x04, "pmaddubsw", memopv8i8, + int_x86_ssse3_pmadd_ub_sw>; + defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, + int_x86_ssse3_pshuf_b_128>, + SS3I_binop_rm_int_mm<0x00, "pshufb", memopv8i8, + int_x86_ssse3_pshuf_b>; + defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8, + int_x86_ssse3_psign_b_128>, + SS3I_binop_rm_int_mm<0x08, "psignb", memopv8i8, + int_x86_ssse3_psign_b>; + defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16, + int_x86_ssse3_psign_w_128>, + SS3I_binop_rm_int_mm<0x09, "psignw", memopv4i16, + int_x86_ssse3_psign_w>; + defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32, + int_x86_ssse3_psign_d_128>, + SS3I_binop_rm_int_mm<0x0A, "psignd", memopv2i32, + int_x86_ssse3_psign_d>; +} +defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16, + int_x86_ssse3_pmul_hr_sw_128>, + SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", memopv4i16, + int_x86_ssse3_pmul_hr_sw>; } def : Pat<(X86pshufb VR128:$src, VR128:$mask), @@ -3396,22 +3718,16 @@ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// -multiclass sse3_palign<string asm, bit Is2Addr = 1> { +multiclass ssse3_palign_mm<string asm> { def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), - !if(Is2Addr, - !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - !strconcat(asm, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - []>; + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, i8imm:$src3), - !if(Is2Addr, - !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - !strconcat(asm, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - []>; + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; +} +multiclass ssse3_palign<string asm, bit Is2Addr = 1> { def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !if(Is2Addr, @@ -3429,9 +3745,10 @@ multiclass sse3_palign<string asm, bit Is2Addr = 1> { } let isAsmParserOnly = 1, Predicates = [HasAVX] in - defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V; + defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; let Constraints = "$src1 = $dst" in - defm PALIGN : sse3_palign<"palignr">; + defm PALIGN : ssse3_palign<"palignr">, + ssse3_palign_mm<"palignr">; let AddedComplexity = 5 in { @@ -3732,31 +4049,62 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>; // Use movaps / movups for SSE integer load / store (one byte shorter). -def : Pat<(alignedloadv4i32 addr:$src), - (MOVAPSrm addr:$src)>; -def : Pat<(loadv4i32 addr:$src), - (MOVUPSrm addr:$src)>; -def : Pat<(alignedloadv2i64 addr:$src), - (MOVAPSrm addr:$src)>; -def : Pat<(loadv2i64 addr:$src), - (MOVUPSrm addr:$src)>; - -def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; +let Predicates = [HasSSE1] in { + def : Pat<(alignedloadv4i32 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv4i32 addr:$src), + (MOVUPSrm addr:$src)>; + def : Pat<(alignedloadv2i64 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv2i64 addr:$src), + (MOVUPSrm addr:$src)>; + + def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +} + +// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter). +let Predicates = [HasAVX] in { + def : Pat<(alignedloadv4i32 addr:$src), + (VMOVAPSrm addr:$src)>; + def : Pat<(loadv4i32 addr:$src), + (VMOVUPSrm addr:$src)>; + def : Pat<(alignedloadv2i64 addr:$src), + (VMOVAPSrm addr:$src)>; + def : Pat<(loadv2i64 addr:$src), + (VMOVUPSrm addr:$src)>; + + def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; +} //===----------------------------------------------------------------------===// // SSE4.1 - Packed Move with Sign/Zero Extend @@ -3923,8 +4271,12 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let isAsmParserOnly = 1, Predicates = [HasAVX] in { defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; + def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX; +} defm PEXTRB : SS41I_extract8<0x14, "pextrb">; @@ -4007,8 +4359,13 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { addr:$dst)]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let isAsmParserOnly = 1, Predicates = [HasAVX] in { defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; + def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, OpSize, VEX; +} defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; // Also match an EXTRACTPS store when the store is done as f32 instead of i32. @@ -4131,80 +4488,84 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), - (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; + (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, + Requires<[HasAVX]>; +def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), + (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, + Requires<[HasSSE41]>; //===----------------------------------------------------------------------===// // SSE4.1 - Round Instructions //===----------------------------------------------------------------------===// -multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, - string OpcodeStr, - Intrinsic V4F32Int, - Intrinsic V2F64Int> { +multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, + X86MemOperand x86memop, RegisterClass RC, + PatFrag mem_frag32, PatFrag mem_frag64, + Intrinsic V4F32Int, Intrinsic V2F64Int> { // Intrinsic operation, reg. // Vector intrinsic operation, reg def PSr_Int : SS4AIi8<opcps, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>, + [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>, OpSize; // Vector intrinsic operation, mem def PSm_Int : Ii8<opcps, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, - (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>, + [(set RC:$dst, + (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>, TA, OpSize, Requires<[HasSSE41]>; // Vector intrinsic operation, reg def PDr_Int : SS4AIi8<opcpd, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>, + [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>, OpSize; // Vector intrinsic operation, mem def PDm_Int : SS4AIi8<opcpd, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, - (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>, + [(set RC:$dst, + (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>, OpSize; } -multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd, - string OpcodeStr> { +multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd, + RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> { // Intrinsic operation, reg. // Vector intrinsic operation, reg def PSr : SS4AIi8<opcps, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; // Vector intrinsic operation, mem def PSm : Ii8<opcps, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, TA, OpSize, Requires<[HasSSE41]>; // Vector intrinsic operation, reg def PDr : SS4AIi8<opcpd, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; // Vector intrinsic operation, mem def PDm : SS4AIi8<opcpd, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; @@ -4261,8 +4622,8 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, OpSize; } -multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd, - string OpcodeStr> { +multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd, + string OpcodeStr> { // Intrinsic operation, reg. def SSr : SS4AIi8<opcss, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), @@ -4295,24 +4656,90 @@ multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd, // FP round - roundss, roundps, roundsd, roundpd let isAsmParserOnly = 1, Predicates = [HasAVX] in { // Intrinsic form - defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", - int_x86_sse41_round_ps, int_x86_sse41_round_pd>, - VEX; + defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128, + memopv4f32, memopv2f64, + int_x86_sse41_round_ps, + int_x86_sse41_round_pd>, VEX; + defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256, + memopv8f32, memopv4f64, + int_x86_avx_round_ps_256, + int_x86_avx_round_pd_256>, VEX; defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", - int_x86_sse41_round_ss, int_x86_sse41_round_sd, - 0>, VEX_4V; + int_x86_sse41_round_ss, + int_x86_sse41_round_sd, 0>, VEX_4V; + // Instructions for the assembler - defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX; - defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V; + defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">, + VEX; + defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">, + VEX; + defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V; } -defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", +defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, + memopv4f32, memopv2f64, int_x86_sse41_round_ps, int_x86_sse41_round_pd>; let Constraints = "$src1 = $dst" in defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", int_x86_sse41_round_ss, int_x86_sse41_round_sd>; //===----------------------------------------------------------------------===// +// SSE4.1 - Packed Bit Test +//===----------------------------------------------------------------------===// + +// ptest instruction we'll lower to this in X86ISelLowering primarily from +// the intel intrinsic that corresponds to this. +let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + OpSize, VEX; +def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + OpSize, VEX; + +def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, + OpSize, VEX; +def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>, + OpSize, VEX; +} + +let Defs = [EFLAGS] in { +def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "ptest \t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + OpSize; +def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), + "ptest \t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + OpSize; +} + +// The bit test instructions below are AVX only +multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { + def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX; + def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, + OpSize, VEX; +} + +let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>; +defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>; +defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>; +defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; +} + +//===----------------------------------------------------------------------===// // SSE4.1 - Misc Instructions //===----------------------------------------------------------------------===// @@ -4431,79 +4858,104 @@ let Constraints = "$src1 = $dst" in /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Is2Addr = 1> { + Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit Is2Addr = 1> { let isCommutable = 1 in - def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set VR128:$dst, - (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>, + [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, OpSize; - def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3), + def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>, + [(set RC:$dst, + (IntId RC:$src1, + (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, OpSize; } let isAsmParserOnly = 1, Predicates = [HasAVX] in { let isCommutable = 0 in { defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; + defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", + int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", + int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; } defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; + defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, + VR256, memopv32i8, i256mem, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { - defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>; - defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>; - defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>; - defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>; + defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, + VR128, memopv16i8, i128mem>; + defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, + VR128, memopv16i8, i128mem>; + defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, + VR128, memopv16i8, i128mem>; + defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, + VR128, memopv16i8, i128mem>; } - defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>; - defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>; + defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, + VR128, memopv16i8, i128mem>; + defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, + VR128, memopv16i8, i128mem>; } /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators let isAsmParserOnly = 1, Predicates = [HasAVX] in { - multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> { - def rr : I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; - - def rm : I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, VR128:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; - } -} - -defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">; -defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">; -defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">; +multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, + RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_frag, Intrinsic IntId> { + def rr : I<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], + SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + + def rm : I<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, RC:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set RC:$dst, + (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), + RC:$src3))], + SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; +} +} + +defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem, + memopv16i8, int_x86_sse41_blendvpd>; +defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem, + memopv16i8, int_x86_sse41_blendvps>; +defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, + memopv16i8, int_x86_sse41_pblendvb>; +defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, + memopv32i8, int_x86_avx_blendv_pd_256>; +defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, + memopv32i8, int_x86_avx_blendv_ps_256>; /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { @@ -4529,30 +4981,6 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; -// ptest instruction we'll lower to this in X86ISelLowering primarily from -// the intel intrinsic that corresponds to this. -let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { -def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, - OpSize, VEX; -def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), - "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, - OpSize, VEX; -} - -let Defs = [EFLAGS] in { -def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, - OpSize; -def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, - OpSize; -} - let isAsmParserOnly = 1, Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", @@ -4603,17 +5031,20 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), //===----------------------------------------------------------------------===// // Packed Compare Implicit Length Strings, Return Mask -let Defs = [EFLAGS], usesCustomInserter = 1 in { - def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "#PCMPISTRM128rr PSEUDO!", +multiclass pseudo_pcmpistrm<string asm> { + def REG : Ii8<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"), [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, - imm:$src3))]>, OpSize; - def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "#PCMPISTRM128rm PSEUDO!", + imm:$src3))]>; + def MEM : Ii8<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"), [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 - VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize; + VR128:$src1, (load addr:$src2), imm:$src3))]>; +} + +let Defs = [EFLAGS], usesCustomInserter = 1 in { + defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>; + defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; } let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1, @@ -4636,20 +5067,20 @@ let Defs = [XMM0, EFLAGS] in { } // Packed Compare Explicit Length Strings, Return Mask -let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { - def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "#PCMPESTRM128rr PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; - - def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "#PCMPESTRM128rm PSEUDO!", +multiclass pseudo_pcmpestrm<string asm> { + def REG : Ii8<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"), + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; + def MEM : Ii8<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"), [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, - OpSize; + VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>; +} + +let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { + defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>; + defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; } let isAsmParserOnly = 1, Predicates = [HasAVX], @@ -4941,3 +5372,579 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), imm:$src2))]>, OpSize; + +//===----------------------------------------------------------------------===// +// CLMUL Instructions +//===----------------------------------------------------------------------===// + +// Only the AVX version of CLMUL instructions are described here. + +// Carry-less Multiplication instructions +let isAsmParserOnly = 1 in { +def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>; + +def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>; + +// Assembler Only +multiclass avx_vpclmul<string asm> { + def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + + def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; +} +defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">; +defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">; +defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">; +defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">; + +} // isAsmParserOnly + +//===----------------------------------------------------------------------===// +// AVX Instructions +//===----------------------------------------------------------------------===// + +let isAsmParserOnly = 1 in { + +// Load from memory and broadcast to all elements of the destination operand +class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, Intrinsic Int> : + AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (Int addr:$src))]>, VEX; + +def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, + int_x86_avx_vbroadcastss>; +def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, + int_x86_avx_vbroadcastss_256>; +def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, + int_x86_avx_vbroadcast_sd_256>; +def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, + int_x86_avx_vbroadcastf128_pd_256>; + +// Insert packed floating-point values +def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR128:$src2, i8imm:$src3), + "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, VEX_4V; +def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f128mem:$src2, i8imm:$src3), + "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, VEX_4V; + +// Extract packed floating-point values +def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), + (ins VR256:$src1, i8imm:$src2), + "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, VEX; +def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), + (ins f128mem:$dst, VR256:$src1, i8imm:$src2), + "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, VEX; + +// Conditional SIMD Packed Loads and Stores +multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, + Intrinsic IntLd, Intrinsic IntLd256, + Intrinsic IntSt, Intrinsic IntSt256, + PatFrag pf128, PatFrag pf256> { + def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, + VEX_4V; + def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, + VEX_4V; + def mr : AVX8I<opc_mr, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; + def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V; +} + +defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", + int_x86_avx_maskload_ps, + int_x86_avx_maskload_ps_256, + int_x86_avx_maskstore_ps, + int_x86_avx_maskstore_ps_256, + memopv4f32, memopv8f32>; +defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", + int_x86_avx_maskload_pd, + int_x86_avx_maskload_pd_256, + int_x86_avx_maskstore_pd, + int_x86_avx_maskstore_pd_256, + memopv2f64, memopv4f64>; + +// Permute Floating-Point Values +multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, + RegisterClass RC, X86MemOperand x86memop_f, + X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag, + Intrinsic IntVar, Intrinsic IntImm> { + def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V; + def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop_i:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V; + + def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX; + def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), + (ins x86memop_f:$src1, i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX; +} + +defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, + memopv4f32, memopv4i32, + int_x86_avx_vpermilvar_ps, + int_x86_avx_vpermil_ps>; +defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, + memopv8f32, memopv8i32, + int_x86_avx_vpermilvar_ps_256, + int_x86_avx_vpermil_ps_256>; +defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, + memopv2f64, memopv2i64, + int_x86_avx_vpermilvar_pd, + int_x86_avx_vpermil_pd>; +defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, + memopv4f64, memopv4i64, + int_x86_avx_vpermilvar_pd_256, + int_x86_avx_vpermil_pd_256>; + +def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, i8imm:$src3), + "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, VEX_4V; +def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2, i8imm:$src3), + "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, VEX_4V; + +// Zero All YMM registers +def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", + [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>; + +// Zero Upper bits of YMM registers +def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", + [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>; + +} // isAsmParserOnly + +def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; + +def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; + +def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), + (VBROADCASTF128 addr:$src)>; + +def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; + +def : Pat<(int_x86_avx_vperm2f128_ps_256 + VR256:$src1, (memopv8f32 addr:$src2), imm:$src3), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_pd_256 + VR256:$src1, (memopv4f64 addr:$src2), imm:$src3), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_si_256 + VR256:$src1, (memopv8i32 addr:$src2), imm:$src3), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; + +//===----------------------------------------------------------------------===// +// SSE Shuffle pattern fragments +//===----------------------------------------------------------------------===// + +// This is part of a "work in progress" refactoring. The idea is that all +// vector shuffles are going to be translated into target specific nodes and +// directly matched by the patterns below (which can be changed along the way) +// The AVX version of some but not all of them are described here, and more +// should come in a near future. + +// Shuffle with PSHUFD instruction folding loads. The first two patterns match +// SSE2 loads, which are always promoted to v2i64. The last one should match +// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD +// in SSE2, how does it ever worked? Anyway, the pattern will remain here until +// we investigate further. +def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), + (i8 imm:$imm))), + (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), + (i8 imm:$imm))), + (PSHUFDmi addr:$src1, imm:$imm)>; +def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), + (i8 imm:$imm))), + (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked? + +// Shuffle with PSHUFD instruction. +def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (PSHUFDri VR128:$src1, imm:$imm)>; + +def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (PSHUFDri VR128:$src1, imm:$imm)>; + +// Shuffle with SHUFPD instruction. +def : Pat<(v2f64 (X86Shufps VR128:$src1, + (memopv2f64 addr:$src2), (i8 imm:$imm))), + (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Shufps VR128:$src1, + (memopv2f64 addr:$src2), (i8 imm:$imm))), + (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; + +def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + +def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + +// Shuffle with SHUFPS instruction. +def : Pat<(v4f32 (X86Shufps VR128:$src1, + (memopv4f32 addr:$src2), (i8 imm:$imm))), + (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Shufps VR128:$src1, + (memopv4f32 addr:$src2), (i8 imm:$imm))), + (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + +def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + +def : Pat<(v4i32 (X86Shufps VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), + (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4i32 (X86Shufps VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), + (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + +def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + +// Shuffle with MOVHLPS instruction +def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), + (MOVHLPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), + (MOVHLPSrr VR128:$src1, VR128:$src2)>; + +// Shuffle with MOVDDUP instruction +def : Pat<(X86Movddup (memopv2f64 addr:$src)), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (memopv2f64 addr:$src)), + (MOVDDUPrm addr:$src)>; + +def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))), + (MOVDDUPrm addr:$src)>; + +def : Pat<(X86Movddup (memopv2i64 addr:$src)), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (memopv2i64 addr:$src)), + (MOVDDUPrm addr:$src)>; + +def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))), + (MOVDDUPrm addr:$src)>; + +def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), + (MOVDDUPrm addr:$src)>; + +def : Pat<(X86Movddup (bc_v2f64 + (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (bc_v2f64 + (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (MOVDDUPrm addr:$src)>; + +// Shuffle with UNPCKLPS +def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + (UNPCKLPSrm VR128:$src1, addr:$src2)>; + +def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + (UNPCKLPSrr VR128:$src1, VR128:$src2)>; + +// Shuffle with UNPCKHPS +def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + (UNPCKHPSrm VR128:$src1, addr:$src2)>; + +def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + (UNPCKHPSrr VR128:$src1, VR128:$src2)>; + +// Shuffle with UNPCKLPD +def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + (UNPCKLPSrm VR128:$src1, addr:$src2)>; + +def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + (UNPCKLPDrr VR128:$src1, VR128:$src2)>; + +// Shuffle with UNPCKHPD +def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + (UNPCKLPSrm VR128:$src1, addr:$src2)>; + +def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + (UNPCKHPDrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKLBW +def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), + (PUNPCKLBWrm VR128:$src1, addr:$src2)>; +def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)), + (PUNPCKLBWrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKLWD +def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), + (PUNPCKLWDrm VR128:$src1, addr:$src2)>; +def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)), + (PUNPCKLWDrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKLDQ +def : Pat<(v4i32 (X86Punpckldq VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), + (PUNPCKLDQrm VR128:$src1, addr:$src2)>; +def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)), + (PUNPCKLDQrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKLQDQ +def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))), + (PUNPCKLQDQrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)), + (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKHBW +def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), + (PUNPCKHBWrm VR128:$src1, addr:$src2)>; +def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)), + (PUNPCKHBWrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKHWD +def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), + (PUNPCKHWDrm VR128:$src1, addr:$src2)>; +def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)), + (PUNPCKHWDrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKHDQ +def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), + (PUNPCKHDQrm VR128:$src1, addr:$src2)>; +def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)), + (PUNPCKHDQrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKHQDQ +def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))), + (PUNPCKHQDQrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)), + (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>; + +// Shuffle with MOVLHPS +def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (MOVHPSrm VR128:$src1, addr:$src2)>; +def : Pat<(X86Movlhps VR128:$src1, + (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm VR128:$src1, addr:$src2)>; +def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; + +// Shuffle with MOVLHPD +def : Pat<(v2f64 (X86Movlhpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVHPDrm VR128:$src1, addr:$src2)>; +// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem +// is during lowering, where it's not possible to recognize the load fold cause +// it has two uses through a bitcast. One use disappears at isel time and the +// fold opportunity reappears. +def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVHPDrm VR128:$src1, addr:$src2)>; + +// Shuffle with MOVSS +def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), + (MOVSSrr VR128:$src1, FR32:$src2)>; +def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; +def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; +// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem +// is during lowering, where it's not possible to recognize the load fold cause +// it has two uses through a bitcast. One use disappears at isel time and the +// fold opportunity reappears. +def : Pat<(X86Movss VR128:$src1, + (bc_v4i32 (v2i64 (load addr:$src2)))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + +// Shuffle with MOVSD +def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), + (MOVSDrr VR128:$src1, FR64:$src2)>; +def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; +def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; +def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; +def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; + +// Shuffle with MOVSHDUP +def : Pat<(v4i32 (X86Movshdup VR128:$src)), + (MOVSHDUPrr VR128:$src)>; +def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))), + (MOVSHDUPrm addr:$src)>; + +def : Pat<(v4f32 (X86Movshdup VR128:$src)), + (MOVSHDUPrr VR128:$src)>; +def : Pat<(X86Movshdup (memopv4f32 addr:$src)), + (MOVSHDUPrm addr:$src)>; + +// Shuffle with MOVSLDUP +def : Pat<(v4i32 (X86Movsldup VR128:$src)), + (MOVSLDUPrr VR128:$src)>; +def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))), + (MOVSLDUPrm addr:$src)>; + +def : Pat<(v4f32 (X86Movsldup VR128:$src)), + (MOVSLDUPrr VR128:$src)>; +def : Pat<(X86Movsldup (memopv4f32 addr:$src)), + (MOVSLDUPrm addr:$src)>; + +// Shuffle with PSHUFHW +def : Pat<(v8i16 (X86PShufhwLd addr:$src, (i8 imm:$imm))), + (PSHUFHWmi addr:$src, imm:$imm)>; +def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), + (PSHUFHWri VR128:$src, imm:$imm)>; +def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), + (PSHUFHWmi addr:$src, imm:$imm)>; + +// Shuffle with PSHUFLW +def : Pat<(v8i16 (X86PShuflwLd addr:$src, (i8 imm:$imm))), + (PSHUFLWmi addr:$src, imm:$imm)>; +def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), + (PSHUFLWri VR128:$src, imm:$imm)>; +def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), + (PSHUFLWmi addr:$src, imm:$imm)>; + +// Shuffle with PALIGN +def : Pat<(v1i64 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), + (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; +def : Pat<(v2i32 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), + (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; +def : Pat<(v4i16 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), + (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; +def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), + (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; + +def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + +// Shuffle with MOVLPS +def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; +def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; +def : Pat<(X86Movlps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + +// Shuffle with MOVLPD +def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; +def : Pat<(v2f64 (X86Movlpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + +// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD +def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst), + (MOVHPSmr addr:$dst, VR128:$src)>; +def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), + (MOVHPDmr addr:$dst, VR128:$src)>; + +def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; +def : Pat<(store (v4i32 (X86Movlps + (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; + +def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; +def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index 2b8720bac343..36badb403e81 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -103,6 +103,9 @@ getNonexecutableStackSection(MCContext &Ctx) const { } X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { + if (Triple.getArch() == Triple::x86_64) + GlobalPrefix = ""; + AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 23b0666f5f30..9564fe0b92d4 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -365,7 +365,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, const TargetInstrDesc &Desc, raw_ostream &OS) const { bool HasVEX_4V = false; - if (TSFlags & X86II::VEX_4V) + if ((TSFlags >> 32) & X86II::VEX_4V) HasVEX_4V = true; // VEX_R: opcode externsion equivalent to REX.R in @@ -429,10 +429,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if (TSFlags & X86II::OpSize) VEX_PP = 0x01; - if (TSFlags & X86II::VEX_W) + if ((TSFlags >> 32) & X86II::VEX_W) VEX_W = 1; - if (TSFlags & X86II::VEX_L) + if ((TSFlags >> 32) & X86II::VEX_L) VEX_L = 1; switch (TSFlags & X86II::Op0Mask) { @@ -469,33 +469,39 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, unsigned NumOps = MI.getNumOperands(); unsigned CurOp = 0; + bool IsDestMem = false; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); + case X86II::MRMDestMem: + IsDestMem = true; + // The important info for the VEX prefix is never beyond the address + // registers. Don't check beyond that. + NumOps = CurOp = X86::AddrNumOperands; case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: - case X86II::MRMDestMem: - NumOps = CurOp = X86::AddrNumOperands; case X86II::MRMSrcMem: case X86II::MRMSrcReg: if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; - - // CurOp and NumOps are equal when VEX_R represents a register used - // to index a memory destination (which is the last operand) - CurOp = (CurOp == NumOps) ? 0 : CurOp+1; + CurOp++; if (HasVEX_4V) { - VEX_4V = getVEXRegisterEncoding(MI, CurOp); + VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp); CurOp++; } + // To only check operands before the memory address ones, start + // the search from the begining + if (IsDestMem) + CurOp = 0; + // If the last register should be encoded in the immediate field // do not use any bit from VEX prefix to this register, ignore it - if (TSFlags & X86II::VEX_I8IMM) + if ((TSFlags >> 32) & X86II::VEX_I8IMM) NumOps--; for (; CurOp != NumOps; ++CurOp) { @@ -508,7 +514,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_X = 0x0; } break; - default: // MRMDestReg, MRM0r-MRM7r + default: // MRMDestReg, MRM0r-MRM7r, RawFrm + if (!MI.getNumOperands()) + break; + if (MI.getOperand(CurOp).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0; @@ -524,7 +533,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_R = 0x0; } break; - assert(0 && "Not implemented!"); } // Emit segment override opcode prefix as needed. @@ -793,9 +801,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // It uses the VEX.VVVV field? bool HasVEX_4V = false; - if (TSFlags & X86II::VEX) + if ((TSFlags >> 32) & X86II::VEX) HasVEXPrefix = true; - if (TSFlags & X86II::VEX_4V) + if ((TSFlags >> 32) & X86II::VEX_4V) HasVEX_4V = true; // Determine where the memory operand starts, if present. @@ -819,6 +827,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); break; + + case X86II::RawFrmImm16: + EmitByte(BaseOpcode, CurByte, OS); + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); + EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups); + break; case X86II::AddRegFrm: EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS); @@ -833,10 +849,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMDestMem: EmitByte(BaseOpcode, CurByte, OS); + SrcRegNum = CurOp + X86::AddrNumOperands; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + EmitMemModRMByte(MI, CurOp, - GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)), + GetX86RegNum(MI.getOperand(SrcRegNum)), TSFlags, CurByte, OS, Fixups); - CurOp += X86::AddrNumOperands + 1; + CurOp = SrcRegNum + 1; break; case X86II::MRMSrcReg: @@ -934,7 +955,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if (CurOp != NumOps) { // The last source register of a 4 operand instruction in AVX is encoded // in bits[7:4] of a immediate byte, and bits[3:0] are ignored. - if (TSFlags & X86II::VEX_I8IMM) { + if ((TSFlags >> 32) & X86II::VEX_I8IMM) { const MCOperand &MO = MI.getOperand(CurOp++); bool IsExtReg = X86InstrInfo::isX86_64ExtendedReg(MO.getReg()); diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index e67fc06a6cd7..8c4620f92177 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -16,7 +16,6 @@ #include "X86AsmPrinter.h" #include "X86COFFMachineModuleInfo.h" #include "X86MCAsmInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -29,21 +28,19 @@ #include "llvm/Type.h" using namespace llvm; - -const X86Subtarget &X86MCInstLower::getSubtarget() const { - return AsmPrinter.getSubtarget(); -} +X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf, + X86AsmPrinter &asmprinter) +: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()), + MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {} MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { - assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin"); - return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); + return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>(); } MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { - const TargetLowering *TLI = AsmPrinter.TM.getTargetLowering(); - return static_cast<const X86TargetLowering*>(TLI)-> - getPICBaseSymbol(AsmPrinter.MF, Ctx); + return static_cast<const X86TargetLowering*>(TM.getTargetLowering())-> + getPICBaseSymbol(&MF, Ctx); } /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol @@ -56,7 +53,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { if (!MO.isGlobal()) { assert(MO.isSymbol()); - Name += AsmPrinter.MAI->getGlobalPrefix(); + Name += MAI.getGlobalPrefix(); Name += MO.getSymbolName(); } else { const GlobalValue *GV = MO.getGlobal(); @@ -91,7 +88,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { assert(MO.isGlobal() && "Extern symbol not handled yet"); StubSym = MachineModuleInfoImpl:: - StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()), + StubValueTy(Mang->getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } return Sym; @@ -105,7 +102,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { assert(MO.isGlobal() && "Extern symbol not handled yet"); StubSym = MachineModuleInfoImpl:: - StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()), + StubValueTy(Mang->getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } return Sym; @@ -121,7 +118,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { if (MO.isGlobal()) { StubSym = MachineModuleInfoImpl:: - StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()), + StubValueTy(Mang->getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } else { Name.erase(Name.end()-5, Name.end()); @@ -178,7 +175,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx), Ctx); - if (MO.isJTI() && AsmPrinter.MAI->hasSetDirective()) { + if (MO.isJTI() && MAI.hasSetDirective()) { // If .set directive is supported, use it to reduce the number of // relocations the assembler will generate for differences between // local labels. This is only safe when the symbols are in the same @@ -255,7 +252,13 @@ static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { } /// \brief Simplify things like MOV32rm to MOV32o32a. -static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) { +static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, + unsigned Opcode) { + // Don't make these simplifications in 64-bit mode; other assemblers don't + // perform them because they make the code larger. + if (Printer.getSubtarget().is64Bit()) + return; + bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg(); unsigned AddrBase = IsStore; unsigned RegOp = IsStore ? 0 : 5; @@ -336,7 +339,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; case MachineOperand::MO_BlockAddress: MCOp = LowerSymbolOperand(MO, - AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); + AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); break; } @@ -377,12 +380,17 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break; case X86::MMX_V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break; - case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; - case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break; - case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; + case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; + case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break; + case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; + case X86::AVX_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break; + case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break; + case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break; + case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; + case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; case X86::MOV16r0: LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 @@ -393,12 +401,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; - // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have + // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have // register inputs modeled as normal uses instead of implicit uses. As such, // truncate off all but the first operand (the callee). FIXME: Change isel. case X86::TAILJMPr64: case X86::CALL64r: - case X86::CALL64pcrel32: { + case X86::CALL64pcrel32: + case X86::WINCALL64r: + case X86::WINCALL64pcrel32: { unsigned Opcode = OutMI.getOpcode(); MCOperand Saved = OutMI.getOperand(0); OutMI = MCInst(); @@ -456,15 +466,13 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { // MOV64ao8, MOV64o8a // XCHG16ar, XCHG32ar, XCHG64ar case X86::MOV8mr_NOREX: - case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break; + case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break; case X86::MOV8rm_NOREX: - case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break; - case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break; - case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break; - case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break; - case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break; - case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break; - case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break; + case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break; + case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break; + case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break; + case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break; + case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break; case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break; case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break; @@ -505,46 +513,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } } -void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &O) { - // Only the target-dependent form of DBG_VALUE should get here. - // Referencing the offset and metadata as NOps-2 and NOps-1 is - // probably portable to other targets; frame pointer location is not. - unsigned NOps = MI->getNumOperands(); - assert(NOps==7); - O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); - if (V.getContext().isSubprogram()) - O << DISubprogram(V.getContext()).getDisplayName() << ":"; - O << V.getName(); - O << " <- "; - // Frame address. Currently handles register +- offset only. - O << '['; - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) - printOperand(MI, 0, O); - else - O << "undef"; - O << '+'; printOperand(MI, 3, O); - O << ']'; - O << "+"; - printOperand(MI, NOps-2, O); -} - -MachineLocation -X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { - MachineLocation Location; - assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - - if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); - return Location; -} - void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { - X86MCInstLower MCInstLowering(OutContext, Mang, *this); + X86MCInstLower MCInstLowering(Mang, *MF, *this); switch (MI->getOpcode()) { case TargetOpcode::DBG_VALUE: if (isVerbose() && OutStreamer.hasRawTextSupport()) { @@ -555,6 +526,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; + // Emit nothing here but a comment if we can. + case X86::Int_MemBarrier: + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER")); + return; + case X86::TAILJMPr: case X86::TAILJMPd: case X86::TAILJMPd64: diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h index 9e5474fc81b3..539b09be6fd7 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h +++ b/lib/Target/X86/X86MCInstLower.h @@ -13,27 +13,30 @@ #include "llvm/Support/Compiler.h" namespace llvm { + class MCAsmInfo; class MCContext; class MCInst; class MCOperand; class MCSymbol; class MachineInstr; + class MachineFunction; class MachineModuleInfoMachO; class MachineOperand; class Mangler; + class TargetMachine; class X86AsmPrinter; - class X86Subtarget; /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. class LLVM_LIBRARY_VISIBILITY X86MCInstLower { MCContext &Ctx; Mangler *Mang; + const MachineFunction &MF; + const TargetMachine &TM; + const MCAsmInfo &MAI; X86AsmPrinter &AsmPrinter; - - const X86Subtarget &getSubtarget() const; public: - X86MCInstLower(MCContext &ctx, Mangler *mang, X86AsmPrinter &asmprinter) - : Ctx(ctx), Mang(mang), AsmPrinter(asmprinter) {} + X86MCInstLower(Mangler *mang, const MachineFunction &MF, + X86AsmPrinter &asmprinter); void Lower(const MachineInstr *MI, MCInst &OutMI) const; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 5f31e00ebabd..fedd49ebb540 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -38,8 +38,15 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt<bool> +ForceStackAlign("force-align-stack", + cl::desc("Force align the stack to the minimum alignment" + " needed for the function."), + cl::init(false), cl::Hidden); + X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ? @@ -193,6 +200,12 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::DR7: return 7; + // Pseudo index registers are equivalent to a "none" + // scaled index (See Intel Manual 2A, table 2-3) + case X86::EIZ: + case X86::RIZ: + return 4; + default: assert(isVirtualRegister(RegNo) && "Unknown physical register!"); llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); @@ -456,26 +469,29 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - bool requiresRealignment = - RealignStack && ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttr(Attribute::StackAlignment)); + bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttr(Attribute::StackAlignment)); // FIXME: Currently we don't support stack realignment for functions with // variable-sized allocas. - // FIXME: Temporary disable the error - it seems to be too conservative. + // FIXME: It's more complicated than this... if (0 && requiresRealignment && MFI->hasVarSizedObjects()) report_fatal_error( "Stack realignment in presense of dynamic allocas is not supported"); - - return (requiresRealignment && !MFI->hasVarSizedObjects()); + + // If we've requested that we force align the stack do so now. + if (ForceStackAlign) + return canRealignStack(MF); + + return requiresRealignment && canRealignStack(MF); } -bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { +bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } -bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, - int &FrameIdx) const { +bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, + unsigned Reg, int &FrameIdx) const { if (Reg == FramePtr && hasFP(MF)) { FrameIdx = MF.getFrameInfo()->getObjectIndexBegin(); return true; @@ -610,10 +626,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -unsigned +void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const{ + int SPAdj, RegScavenger *RS) const{ assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -660,7 +675,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset(); MI.getOperand(i+3).setOffset(Offset); } - return 0; } void @@ -750,7 +764,7 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, } } -/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator. +/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. static void mergeSPUpdatesDown(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -901,6 +915,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { bool HasFP = hasFP(MF); DebugLoc DL; + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum SlotSize. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else if (MaxAlign < SlotSize) + MaxAlign = SlotSize; + } + // Add RETADDR move area to callee saved frame size. int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) @@ -979,7 +1004,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark the place where EBP/RBP was saved. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); // Define the current CFA rule to use the provided offset. if (StackSize) { @@ -1007,7 +1032,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); // Define the current CFA to use the EBP/RBP register. MachineLocation FPDst(FramePtr); @@ -1047,7 +1072,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (!HasFP && needsFrameMoves) { // Mark callee-saved push instruction. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); // Define the current CFA rule to use the provided offset. unsigned Ptr = StackSize ? @@ -1062,7 +1087,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { DL = MBB.findDebugLoc(MBBI); // Adjust stack pointer: ESP -= numbytes. - if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { + + // Windows and cygwin/mingw require a prologue helper routine when allocating + // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw + // uses __alloca. __alloca and the 32-bit version of __chkstk will probe + // the stack and adjust the stack pointer in one go. The 64-bit version + // of __chkstk is only responsible for probing the stack. The 64-bit + // prologue is responsible for adjusting the stack pointer. Touching the + // stack at 4K increments is necessary to ensure that the guard pages used + // by the OS virtual memory manager are allocated in correct sequence. + if (NumBytes >= 4096 && + (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) { // Check, whether EAX is livein for this function. bool isEAXAlive = false; for (MachineRegisterInfo::livein_iterator @@ -1073,16 +1108,16 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { Reg == X86::AH || Reg == X86::AL); } - // Function prologue calls _alloca to probe the stack when allocating more - // than 4k bytes in one go. Touching the stack at 4K increments is necessary - // to ensure that the guard pages used by the OS virtual memory manager are - // allocated in correct sequence. + + const char *StackProbeSymbol = + Subtarget->isTargetWindows() ? "_chkstk" : "_alloca"; if (!isEAXAlive) { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca") - .addReg(StackPtr, RegState::Define | RegState::Implicit); + .addExternalSymbol(StackProbeSymbol) + .addReg(StackPtr, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); } else { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) @@ -1093,8 +1128,9 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes - 4); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca") - .addReg(StackPtr, RegState::Define | RegState::Implicit); + .addExternalSymbol(StackProbeSymbol) + .addReg(StackPtr, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), @@ -1119,7 +1155,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if ((NumBytes || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. @@ -1172,6 +1208,17 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else + MaxAlign = MaxAlign ? MaxAlign : 4; + } + if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; @@ -1519,7 +1566,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { namespace { struct MSAH : public MachineFunctionPass { static char ID; - MSAH() : MachineFunctionPass(&ID) {} + MSAH() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF) { const X86TargetMachine *TM = diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index d852bcd2011c..527df05c58fc 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -117,18 +117,17 @@ public: bool needsStackRealignment(const MachineFunction &MF) const; - bool hasReservedCallFrame(MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; - bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, + bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 9f0382e3fae9..95269b15760e 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -241,6 +241,10 @@ let Namespace = "X86" in { def CR6 : Register<"cr6">; def CR7 : Register<"cr7">; def CR8 : Register<"cr8">; + + // Pseudo index registers + def EIZ : Register<"eiz">; + def RIZ : Register<"riz">; } @@ -804,7 +808,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, }]; } -def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256, +def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15]> { @@ -829,4 +833,15 @@ def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256, // Status flags registers. def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> { let CopyCost = -1; // Don't allow copying of status registers. + + // EFLAGS is not allocatable. + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + CCRClass::iterator + CCRClass::allocation_order_end(const MachineFunction &MF) const { + return allocation_order_begin(MF); + } + }]; } diff --git a/lib/Target/X86/X86ShuffleDecode.h b/lib/Target/X86/X86ShuffleDecode.h new file mode 100644 index 000000000000..df040520bc8f --- /dev/null +++ b/lib/Target/X86/X86ShuffleDecode.h @@ -0,0 +1,155 @@ +//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Define several functions to decode x86 specific shuffle semantics into a +// generic vector mask. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_SHUFFLE_DECODE_H +#define X86_SHUFFLE_DECODE_H + +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Vector Mask Decoding +//===----------------------------------------------------------------------===// + +enum { + SM_SentinelZero = ~0U +}; + +static inline +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { + // Defaults the copying the dest value. + ShuffleMask.push_back(0); + ShuffleMask.push_back(1); + ShuffleMask.push_back(2); + ShuffleMask.push_back(3); + + // Decode the immediate. + unsigned ZMask = Imm & 15; + unsigned CountD = (Imm >> 4) & 3; + unsigned CountS = (Imm >> 6) & 3; + + // CountS selects which input element to use. + unsigned InVal = 4+CountS; + // CountD specifies which element of destination to update. + ShuffleMask[CountD] = InVal; + // ZMask zaps values, potentially overriding the CountD elt. + if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; + if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; + if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; + if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; +} + +// <3,1> or <6,7,2,3> +static void DecodeMOVHLPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = NElts/2; i != NElts; ++i) + ShuffleMask.push_back(NElts+i); + + for (unsigned i = NElts/2; i != NElts; ++i) + ShuffleMask.push_back(i); +} + +// <0,2> or <0,1,4,5> +static void DecodeMOVLHPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) + ShuffleMask.push_back(i); + + for (unsigned i = 0; i != NElts/2; ++i) + ShuffleMask.push_back(NElts+i); +} + +static void DecodePSHUFMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts; ++i) { + ShuffleMask.push_back(Imm % NElts); + Imm /= NElts; + } +} + +static void DecodePSHUFHWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + ShuffleMask.push_back(0); + ShuffleMask.push_back(1); + ShuffleMask.push_back(2); + ShuffleMask.push_back(3); + for (unsigned i = 0; i != 4; ++i) { + ShuffleMask.push_back(4+(Imm & 3)); + Imm >>= 2; + } +} + +static void DecodePSHUFLWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != 4; ++i) { + ShuffleMask.push_back((Imm & 3)); + Imm >>= 2; + } + ShuffleMask.push_back(4); + ShuffleMask.push_back(5); + ShuffleMask.push_back(6); + ShuffleMask.push_back(7); +} + +static void DecodePUNPCKLMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i); + ShuffleMask.push_back(i+NElts); + } +} + +static void DecodePUNPCKHMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i+NElts/2); + ShuffleMask.push_back(i+NElts+NElts/2); + } +} + +static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + // Part that reads from dest. + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(Imm % NElts); + Imm /= NElts; + } + // Part that reads from src. + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(Imm % NElts + NElts); + Imm /= NElts; + } +} + +static void DecodeUNPCKHPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i+NElts/2); // Reads from dest + ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src + } +} + + +/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd +/// etc. NElts indicates the number of elements in the vector allowing it to +/// handle different datatypes and vector widths. +static void DecodeUNPCKLPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i); // Reads from dest + ShuffleMask.push_back(i+NElts); // Reads from src + } +} + +#endif diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 4a10be518f03..0d02e5ee472b 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -73,7 +73,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { if (GV->hasDefaultVisibility() && (isDecl || GV->isWeakForLinker())) return X86II::MO_GOTPCREL; - } else { + } else if (!isTargetWin64()) { assert(isTargetELF() && "Unknown rip-relative target"); // Extra load is needed for all externally visible. @@ -260,9 +260,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; - HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); - HasAVX = ((ECX >> 28) & 0x1); - HasAES = IsIntel && ((ECX >> 25) & 0x1); + HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); + HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); + HasAVX = ((ECX >> 28) & 0x1); + HasAES = IsIntel && ((ECX >> 25) & 0x1); if (IsIntel || IsAMD) { // Determine if bit test memory instructions are slow. @@ -291,6 +292,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , HasSSE4A(false) , HasAVX(false) , HasAES(false) + , HasCLMUL(false) , HasFMA3(false) , HasFMA4(false) , IsBTMemSlow(false) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 486dbc4e2e90..0ee91abe21f4 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -74,6 +74,9 @@ protected: /// HasAES - Target has AES instructions bool HasAES; + /// HasCLMUL - Target has carry-less multiplication + bool HasCLMUL; + /// HasFMA3 - Target has 3-operand fused multiply-add bool HasFMA3; @@ -149,6 +152,7 @@ public: bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasAVX() const { return HasAVX; } bool hasAES() const { return HasAES; } + bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } bool isBTMemSlow() const { return IsBTMemSlow; } @@ -182,6 +186,10 @@ public: return Is64Bit && (isTargetMingw() || isTargetWindows()); } + bool isTargetWin32() const { + return !Is64Bit && (isTargetMingw() || isTargetWindows()); + } + std::string getDataLayout() const { const char *p; if (is64Bit()) diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index df00d3ffcc79..ce8636eb72b5 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -46,8 +46,15 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, bool RelaxAll) { Triple TheTriple(TT); switch (TheTriple.getOS()) { - default: + case Triple::Darwin: return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); + case Triple::MinGW32: + case Triple::MinGW64: + case Triple::Cygwin: + case Triple::Win32: + return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); + default: + return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); } } @@ -105,15 +112,21 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); - + // If no relocation model was picked, default as appropriate for the target. if (getRelocationModel() == Reloc::Default) { - if (!Subtarget.isTargetDarwin()) - setRelocationModel(Reloc::Static); - else if (Subtarget.is64Bit()) + // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode. + // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we + // use static relocation model by default. + if (Subtarget.isTargetDarwin()) { + if (Subtarget.is64Bit()) + setRelocationModel(Reloc::PIC_); + else + setRelocationModel(Reloc::DynamicNoPIC); + } else if (Subtarget.isTargetWin64()) setRelocationModel(Reloc::PIC_); else - setRelocationModel(Reloc::DynamicNoPIC); + setRelocationModel(Reloc::Static); } assert(getRelocationModel() != Reloc::Default && @@ -136,29 +149,27 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, Subtarget.isTargetDarwin() && is64Bit) setRelocationModel(Reloc::PIC_); - + // Determine the PICStyle based on the target selected. if (getRelocationModel() == Reloc::Static) { // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. Subtarget.setPICStyle(PICStyles::None); + } else if (Subtarget.is64Bit()) { + // PIC in 64 bit mode is always rip-rel. + Subtarget.setPICStyle(PICStyles::RIPRel); } else if (Subtarget.isTargetCygMing()) { Subtarget.setPICStyle(PICStyles::None); } else if (Subtarget.isTargetDarwin()) { - if (Subtarget.is64Bit()) - Subtarget.setPICStyle(PICStyles::RIPRel); - else if (getRelocationModel() == Reloc::PIC_) + if (getRelocationModel() == Reloc::PIC_) Subtarget.setPICStyle(PICStyles::StubPIC); else { assert(getRelocationModel() == Reloc::DynamicNoPIC); Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC); } } else if (Subtarget.isTargetELF()) { - if (Subtarget.is64Bit()) - Subtarget.setPICStyle(PICStyles::RIPRel); - else - Subtarget.setPICStyle(PICStyles::GOT); + Subtarget.setPICStyle(PICStyles::GOT); } - + // Finally, if we have "none" as our PIC style, force to static mode. if (Subtarget.getPICStyle() == PICStyles::None) setRelocationModel(Reloc::Static); @@ -182,9 +193,6 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - // Install a pass to insert x87 FP_REG_KILL instructions, as needed. - PM.add(createX87FPRegKillInserterPass()); - PM.add(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index 6656bdc10eae..8f06dd32662f 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -264,15 +264,13 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream O(Str); - + // Check for mov mnemonic - unsigned src, dst, srcSR, dstSR; - if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) { - O << "\tmov " << getRegisterName(dst) << ", "; - O << getRegisterName(src); - } else { + if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm()) + O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", " + << getRegisterName(MI->getOperand(1).getReg()); + else printInstruction(MI, O); - } OutStreamer.EmitRawText(O.str()); } diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 1b8e7edfc7ca..38b35d7666c0 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -10,7 +10,7 @@ tablegen(XCoreGenDAGISel.inc -gen-dag-isel) tablegen(XCoreGenCallingConv.inc -gen-callingconv) tablegen(XCoreGenSubtarget.inc -gen-subtarget) -add_llvm_target(XCore +add_llvm_target(XCoreCodeGen XCoreFrameInfo.cpp XCoreInstrInfo.cpp XCoreISelDAGToDAG.cpp diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 5564ddf133ea..755ece7e9aba 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -56,6 +56,17 @@ namespace { return CurDAG->getTargetConstant(Imm, MVT::i32); } + inline bool immMskBitp(SDNode *inN) const { + ConstantSDNode *N = cast<ConstantSDNode>(inN); + uint32_t value = (uint32_t)N->getZExtValue(); + if (!isMask_32(value)) { + return false; + } + int msksize = 32 - CountLeadingZeros_32(value); + return (msksize >= 1 && msksize <= 8) || + msksize == 16 || msksize == 24 || msksize == 32; + } + // Complex Pattern Selectors. bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); @@ -151,17 +162,15 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; case ISD::Constant: { - if (Predicate_immMskBitp(N)) { + uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue(); + if (immMskBitp(N)) { // Transformation function: get the size of a mask - int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue(); - assert(isMask_32(MaskVal)); // Look for the first non-zero bit - SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal)); + SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val)); return CurDAG->getMachineNode(XCore::MKMSK_rus, dl, MVT::i32, MskSize); } - else if (! Predicate_immU16(N)) { - unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); + else if (!isUInt<16>(Val)) { SDValue CPIdx = CurDAG->getTargetConstantPool(ConstantInt::get( Type::getInt32Ty(*CurDAG->getContext()), Val), diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index dd90ea976770..ad00046af17d 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -46,33 +46,6 @@ static bool isZeroImm(const MachineOperand &op) { return op.isImm() && op.getImm() == 0; } -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -/// -bool XCoreInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSR, unsigned &DstSR) const { - SrcSR = DstSR = 0; // No sub-registers. - - // We look for 4 kinds of patterns here: - // add dst, src, 0 - // sub dst, src, 0 - // or dst, src, src - // and dst, src, src - if ((MI.getOpcode() == XCore::ADD_2rus || MI.getOpcode() == XCore::SUB_2rus) - && isZeroImm(MI.getOperand(2))) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } else if ((MI.getOpcode() == XCore::OR_3r || MI.getOpcode() == XCore::AND_3r) - && MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - return false; -} - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If @@ -437,7 +410,7 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, it->getFrameIdx(), RC, &RI); if (emitFrameMoves) { MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol(); - BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel); + BuildMI(MBB, MI, DL, get(XCore::PROLOG_LABEL)).addSym(SaveLabel); XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it)); } } diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index e5b0171579fc..d2b116eef0d8 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -30,12 +30,6 @@ public: /// virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 19b9b1f8c00c..6b3b39ba1d49 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -140,17 +140,7 @@ def immU20 : PatLeaf<(imm), [{ return (uint32_t)N->getZExtValue() < (1 << 20); }]>; -def immMskBitp : PatLeaf<(imm), [{ - uint32_t value = (uint32_t)N->getZExtValue(); - if (!isMask_32(value)) { - return false; - } - int msksize = 32 - CountLeadingZeros_32(value); - return (msksize >= 1 && msksize <= 8) - || msksize == 16 - || msksize == 24 - || msksize == 32; -}]>; +def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>; def immBitp : PatLeaf<(imm), [{ uint32_t value = (uint32_t)N->getZExtValue(); diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 2a88342180e4..f82e59814e77 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -155,10 +155,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -unsigned +void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; DebugLoc dl = MI.getDebugLoc(); @@ -291,7 +290,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Erase old instruction. MBB.erase(II); - return 0; } void @@ -420,7 +418,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { // Show update of SP. MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); MachineLocation SPDst(MachineLocation::VirtualFP); MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4); @@ -439,7 +437,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { if (emitFrameMoves) { MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveLRLabel); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel); MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset); MachineLocation CSSrc(XCore::LR); MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc)); @@ -455,7 +453,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { MBB.addLiveIn(XCore::R10); if (emitFrameMoves) { MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveR10Label); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label); MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset); MachineLocation CSSrc(XCore::R10); MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc)); @@ -467,7 +465,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { if (emitFrameMoves) { // Show FP is now valid. MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); MachineLocation SPDst(FramePtr); MachineLocation SPSrc(MachineLocation::VirtualFP); MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc)); diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 66132ba8ff66..e636c1c7298a 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -54,9 +54,8 @@ public: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; |